--- tags: Source --- # 1.2. IPO_RTCorrection.R # TEAM: this script was done for RP library(IPO) library(xcms) library(CMSITools) library(tidyverse) library(stringi) register(bpstart(MulticoreParam(12))) source('/tmp/src/getFilesMAX.R') # extracting the script parameters # output file name example: IPO_parameters_MAX_RN-NEG_sQC_B1 # example filepath <- "/tmp/IPO_PP/B1/" # batch number example: 1 args <- commandArgs(trailingOnly = TRUE) filename <- args[1] dirpath <- args[2] polarity <- args[3] filepath <- file.path(dirpath, filename) # input data as exposed to the container, for this step it is batch specific # example: /proj/sens2018586/MetabolomicsData/RP-NEG/B1 files_location <- "/data/" #Input MS data------------------------------------------------------------------------------ # Here Elise went batch specific, while Agneta loaded everything using getFiles instead of getFilesMax # I am guessing that Elise's dataset works with the old version, while Agneta's script was not updated LCMS_data <- getFilesMAX(files_location) # for some reason Agneta skipped this while Elise didn't. Maybe the new function already does it but whatever. #LCMS_data <- LCMS_data %>% cleanFiles(c("WASH","sQCcond","blank", "cond", "iterative")) #locates the raw data. if (polarity == "RN"){ sQCs <- LCMS_data %>% getRP() %>% getNEG() %>% getQC("sQC") #define which samples to use, here QC some negative mode. } if (polarity == "RP"){ sQCs <- LCMS_data %>% getRP() %>% getPOS() %>% getQC("sQC") #define which samples to use, here QC some negative mode. } # TODO: Do I need to make an array of QC samples, as Agneta tried? Kalle said no. # But, running on all samples took too much time. sQCs <- as.data.frame(sQCs %>% group_by(batch) %>% sample_n(5)) saveRDS(sQCs, file=paste(filepath, "_5rand_sQCs.rds", sep = "")) filenames=extractNames(sQCs) nCore=12 param=MulticoreParam(workers=nCore) #Modify number of workers to suit your computer setup and number of samples.For MAX 15 is fine. #pd <- data.frame(sample_name=sub(sQCs$filename, pattern=".mzML", replacement="", fixed=T), sample_group=sQCs$sampleGroup, stringsAsFactors = F) # When running the RT correction and groping in XCMS, you need to specify for the argument 'sample_group...' that it should only use the sQCs. # Here it is not needed, since it has been specified above only to run sQC samples. # NOTE: I am not sure if the MS data should be batch annotated #RPPOS <- readMSData(files=filenamesPOS, pdata=new("NAnnotatedDataFrame", pd), mode='onDisk') msdata <- readMSData(files=filenames, mode='onDisk') #Using the parameters obtained from IPO peak picking, in their average values #TODO: should mzdiff be averaged on absolute values? cwp=CentWaveParam(peakwidth=c(8.05,61.11),ppm=20.68, mzdiff=0.00174353, prefilter=c(3,1000), noise=500) #go to IPO peak peaking for parameter definitions. xdata <- findChromPeaks(msdata, param=cwp, BPPARAM=param) #IPO for retention time correction xdataret <- as(xdata, "xcmsSet") retcorGroupParameters <- getDefaultRetGroupStartingParams() #Params = items below with two values will be optimized, the first defines the lower value, the second the upper value. retcorGroupParameters$minfrac <- 0.8 #defines in how many samples a feature should apear, here in 80% of the samples, this should be lower in XCMS (0.4) when running all samples retcorGroupParameters$gapInit <- c(0.2, 0.7) #penalties for gap openings?? retcorGroupParameters$gapExtend <- c(2,3) #penalties for gap enlargements?? retcorGroupParameters$response <- c(1,10) #?? retcorGroupParameters$profStep <- c(0.8,1.1) #defines the widths (in m/z dimension) of the profiles?? (named binSize when using adjustRtime function in xcms versions >= 3.0) retcorGroupParameters$mzwid <- c(0.015, 0.035) #the allowed variation of m/z within a group of features (named binSize when using groupChromPeaks function in xcms versions >= 3.0) resultRetcorGroup <- optimizeRetGroup(xset=xdataret, params=retcorGroupParameters, nSlaves=1,#old statement that is not used anymore, but has to be there. subdir="PLOT", plot=TRUE) save(resultRetcorGroup, file=paste(filepath, "_IPO_RT.rda", sep = "")) dev.off() best_settings <- resultRetcorGroup$best_settings sink(paste(filepath, "_RT_best_settings.txt", sep = ""), append=TRUE) best_settings sink()