---
tags: Source
---
# 1.2. IPO_RTCorrection.R
# TEAM: this script was done for RP
library(IPO)
library(xcms)
library(CMSITools)
library(tidyverse)
library(stringi)
register(bpstart(MulticoreParam(12)))
source('/tmp/src/getFilesMAX.R')
# extracting the script parameters
# output file name example: IPO_parameters_MAX_RN-NEG_sQC_B1
# example filepath <- "/tmp/IPO_PP/B1/"
# batch number example: 1
args <- commandArgs(trailingOnly = TRUE)
filename <- args[1]
dirpath <- args[2]
polarity <- args[3]
filepath <- file.path(dirpath, filename)
# input data as exposed to the container, for this step it is batch specific
# example: /proj/sens2018586/MetabolomicsData/RP-NEG/B1
files_location <- "/data/"
#Input MS data------------------------------------------------------------------------------
# Here Elise went batch specific, while Agneta loaded everything using getFiles instead of getFilesMax
# I am guessing that Elise's dataset works with the old version, while Agneta's script was not updated
LCMS_data <- getFilesMAX(files_location)
# for some reason Agneta skipped this while Elise didn't. Maybe the new function already does it but whatever.
#LCMS_data <- LCMS_data %>% cleanFiles(c("WASH","sQCcond","blank", "cond", "iterative")) #locates the raw data.
if (polarity == "RN"){
sQCs <- LCMS_data %>% getRP() %>% getNEG() %>% getQC("sQC") #define which samples to use, here QC some negative mode.
}
if (polarity == "RP"){
sQCs <- LCMS_data %>% getRP() %>% getPOS() %>% getQC("sQC") #define which samples to use, here QC some negative mode.
}
# TODO: Do I need to make an array of QC samples, as Agneta tried? Kalle said no.
# But, running on all samples took too much time.
sQCs <- as.data.frame(sQCs %>% group_by(batch) %>% sample_n(5))
saveRDS(sQCs, file=paste(filepath, "_5rand_sQCs.rds", sep = ""))
filenames=extractNames(sQCs)
nCore=12
param=MulticoreParam(workers=nCore) #Modify number of workers to suit your computer setup and number of samples.For MAX 15 is fine.
#pd <- data.frame(sample_name=sub(sQCs$filename, pattern=".mzML", replacement="", fixed=T), sample_group=sQCs$sampleGroup, stringsAsFactors = F)
# When running the RT correction and groping in XCMS, you need to specify for the argument 'sample_group...' that it should only use the sQCs.
# Here it is not needed, since it has been specified above only to run sQC samples.
# NOTE: I am not sure if the MS data should be batch annotated
#RPPOS <- readMSData(files=filenamesPOS, pdata=new("NAnnotatedDataFrame", pd), mode='onDisk')
msdata <- readMSData(files=filenames, mode='onDisk')
#Using the parameters obtained from IPO peak picking, in their average values
#TODO: should mzdiff be averaged on absolute values?
cwp=CentWaveParam(peakwidth=c(8.05,61.11),ppm=20.68, mzdiff=0.00174353, prefilter=c(3,1000), noise=500) #go to IPO peak peaking for parameter definitions.
xdata <- findChromPeaks(msdata, param=cwp, BPPARAM=param)
#IPO for retention time correction
xdataret <- as(xdata, "xcmsSet")
retcorGroupParameters <- getDefaultRetGroupStartingParams() #Params = items below with two values will be optimized, the first defines the lower value, the second the upper value.
retcorGroupParameters$minfrac <- 0.8 #defines in how many samples a feature should apear, here in 80% of the samples, this should be lower in XCMS (0.4) when running all samples
retcorGroupParameters$gapInit <- c(0.2, 0.7) #penalties for gap openings??
retcorGroupParameters$gapExtend <- c(2,3) #penalties for gap enlargements??
retcorGroupParameters$response <- c(1,10) #??
retcorGroupParameters$profStep <- c(0.8,1.1) #defines the widths (in m/z dimension) of the profiles?? (named binSize when using adjustRtime function in xcms versions >= 3.0)
retcorGroupParameters$mzwid <- c(0.015, 0.035) #the allowed variation of m/z within a group of features (named binSize when using groupChromPeaks function in xcms versions >= 3.0)
resultRetcorGroup <- optimizeRetGroup(xset=xdataret,
params=retcorGroupParameters,
nSlaves=1,#old statement that is not used anymore, but has to be there.
subdir="PLOT",
plot=TRUE)
save(resultRetcorGroup, file=paste(filepath, "_IPO_RT.rda", sep = ""))
dev.off()
best_settings <- resultRetcorGroup$best_settings
sink(paste(filepath, "_RT_best_settings.txt", sep = ""), append=TRUE)
best_settings
sink()