Convert OpenMS MSstatsTMT report into the required input format for MSstatsTMT.

OpenMStoMSstatsTMTFormat(
  input,
  useUniquePeptide = TRUE,
  rmPSM_withMissing_withinRun = FALSE,
  rmPSM_withfewMea_withinRun = TRUE,
  rmProtein_with1Feature = FALSE,
  summaryforMultiplePSMs = sum
)

Arguments

input

MSstatsTMT report from OpenMS

useUniquePeptide

TRUE(default) removes peptides that are assigned for more than one proteins. We assume to use unique peptide for each protein.

rmPSM_withMissing_withinRun

TRUE will remove PSM with any missing value within each Run. Defaut is FALSE.

rmPSM_withfewMea_withinRun

only for rmPSM_withMissing_withinRun = FALSE. TRUE(default) will remove the features that have 1 or 2 measurements within each Run.

rmProtein_with1Feature

TRUE will remove the proteins which have only 1 peptide and charge. Defaut is FALSE.

summaryforMultiplePSMs

sum(default) or max - when there are multiple measurements for certain feature in certain run, select the feature with the largest summation or maximal value.

Value

input for proteinSummarization function

Examples

head(raw.om)
#> RetentionTime ProteinName PeptideSequence Charge #> 1 2924.491 sp|P11679|K2C8_MOUSE .(TMT6plex)AEAETMYQIK(TMT6plex) 2 #> 2 2924.491 sp|P11679|K2C8_MOUSE .(TMT6plex)AEAETMYQIK(TMT6plex) 2 #> 3 2924.491 sp|P11679|K2C8_MOUSE .(TMT6plex)AEAETMYQIK(TMT6plex) 2 #> 4 2924.491 sp|P11679|K2C8_MOUSE .(TMT6plex)AEAETMYQIK(TMT6plex) 2 #> 5 2924.491 sp|P11679|K2C8_MOUSE .(TMT6plex)AEAETMYQIK(TMT6plex) 2 #> 6 2924.491 sp|P11679|K2C8_MOUSE .(TMT6plex)AEAETMYQIK(TMT6plex) 2 #> Channel Condition BioReplicate Run Mixture TechRepMixture Fraction #> 1 1 Long_LF 1 1_1_3 1 1_1 3 #> 2 2 Long_LF 2 1_1_3 1 1_1 3 #> 3 3 Long_M 3 1_1_3 1 1_1 3 #> 4 6 Long_M 6 1_1_3 1 1_1 3 #> 5 5 Norm 5 1_1_3 1 1_1 3 #> 6 9 Norm 9 1_1_3 1 1_1 3 #> Intensity #> 1 5727.319 #> 2 6985.365 #> 3 4553.897 #> 4 5937.782 #> 5 5151.292 #> 6 6800.128 #> Reference #> 1 PAMI-176_Mouse_A-J_TMT_40ug_22pctACN_25cm_120min_20160223_OT.mzML_controllerType=0 controllerNumber=1 scan=11324 #> 2 PAMI-176_Mouse_A-J_TMT_40ug_22pctACN_25cm_120min_20160223_OT.mzML_controllerType=0 controllerNumber=1 scan=11324 #> 3 PAMI-176_Mouse_A-J_TMT_40ug_22pctACN_25cm_120min_20160223_OT.mzML_controllerType=0 controllerNumber=1 scan=11324 #> 4 PAMI-176_Mouse_A-J_TMT_40ug_22pctACN_25cm_120min_20160223_OT.mzML_controllerType=0 controllerNumber=1 scan=11324 #> 5 PAMI-176_Mouse_A-J_TMT_40ug_22pctACN_25cm_120min_20160223_OT.mzML_controllerType=0 controllerNumber=1 scan=11324 #> 6 PAMI-176_Mouse_A-J_TMT_40ug_22pctACN_25cm_120min_20160223_OT.mzML_controllerType=0 controllerNumber=1 scan=11324
input.om <- OpenMStoMSstatsTMTFormat(raw.om)
#> Joining, by = c("RetentionTime", "ProteinName", "PeptideSequence", "Charge", "Run", "Reference")
#> ** PSMs, that have all zero intensities across channels in each run, are removed.
#> Joining, by = c("RetentionTime", "ProteinName", "PeptideSequence", "Charge", "Run", "Reference")
#> ** 2 features have 1 or 2 intensities across runs are removed.
#> Joining, by = c("Run", "Channel")
#> ** PSMs have been aggregated to peptide ions.
#> ** For peptides overlapped between fractions of 2_2_2, use the fraction with maximal average abundance.
#> ** For peptides overlapped between fractions of 3_3_3, use the fraction with maximal average abundance.
#> ** Fractions belonging to same mixture have been combined.
head(input.om)
#> ProteinName PeptideSequence Charge #> 1 sp|O08663|MAP2_MOUSE .(TMT6plex)GQEC(Carbamidomethyl)EYPPTQDGR 2 #> 2 sp|O08663|MAP2_MOUSE .(TMT6plex)GQEC(Carbamidomethyl)EYPPTQDGR 2 #> 3 sp|O08663|MAP2_MOUSE .(TMT6plex)GQEC(Carbamidomethyl)EYPPTQDGR 2 #> 4 sp|O08663|MAP2_MOUSE .(TMT6plex)GQEC(Carbamidomethyl)EYPPTQDGR 2 #> 5 sp|O08663|MAP2_MOUSE .(TMT6plex)GQEC(Carbamidomethyl)EYPPTQDGR 2 #> 6 sp|O08663|MAP2_MOUSE .(TMT6plex)GQEC(Carbamidomethyl)EYPPTQDGR 2 #> PSM Mixture TechRepMixture Run #> 1 .(TMT6plex)GQEC(Carbamidomethyl)EYPPTQDGR_2 1 1_1 1_1_1 #> 2 .(TMT6plex)GQEC(Carbamidomethyl)EYPPTQDGR_2 1 1_1 1_1_1 #> 3 .(TMT6plex)GQEC(Carbamidomethyl)EYPPTQDGR_2 1 1_1 1_1_1 #> 4 .(TMT6plex)GQEC(Carbamidomethyl)EYPPTQDGR_2 1 1_1 1_1_1 #> 5 .(TMT6plex)GQEC(Carbamidomethyl)EYPPTQDGR_2 1 1_1 1_1_1 #> 6 .(TMT6plex)GQEC(Carbamidomethyl)EYPPTQDGR_2 1 1_1 1_1_1 #> Channel Condition BioReplicate Intensity #> 1 1 Long_LF 1 18748.36 #> 2 10 Short_LF 10 15084.31 #> 3 2 Long_LF 2 19591.20 #> 4 3 Long_M 3 17800.54 #> 5 4 Short_LF 4 21316.78 #> 6 5 Norm 5 17607.60