Chromatogram.R

--- tags: Source --- # Chromatogram.R #load packages# library(xcms) library(CMSITools) library(tidyverse) library(stringi) register(bpstart(MulticoreParam(12))) # inserting the file location here instead as getFilesMax files_location <- "/data/" #import raw LCMS files, by the changed getFiles function# #source('getFilesMAX.R') getFilesMAX=function(path,pattern='.mzML',complete=TRUE,filesize=FALSE) { cat('\n\n') message('Double-click on a file in a folder with LCMS files -> Choose all files in folder & subdirectories\n') cat('Only files containing the specified pattern (".mzML" as default) are selected.\n') cat('For other formats (or file naming patterns), specify e.g. pattern=".cdf" in the function call\n') cat('If you want to start picking from a folder where no LCMS files are present,\n') cat('you can put a dummy file ("dummy.txt" or similar) in that folder and double click on it.\n') cat('\n\n') if(missing(path)) path=dirname(file.choose()) files=list.files(path, recursive = TRUE, full.names = TRUE) files=grep(pattern,files,value=T,fixed = T) paths=dirname(files) folders=strsplit(paths,"/") folders=sapply(folders,function(x) x[length(x)]) filenames=basename(files) split=strsplit(filenames,split = '_') nSplit <- length(split) # sapply(split, length) # sapply(split, length) %>% table for (i in 1:nSplit) { string <- split[[i]] if (length(string)==7) { string[5] <- paste(string[5:6], collapse='-') string <- string[-6] } split[[i]] <- string } ncols=6 if(length(table(sapply(split,length)))!=1) { warning('\nNon-uniform filename strategy!') if (complete) { warning('\nNon-conforming samples were omitted.') } else { warning('\nNon-conforming samples were kept in the output.') warning('\nUse getComplete() to filter out non-conforming samples.') } keepers=sapply(split,length)==ncols split=split[keepers] } else keepers=rep(TRUE,length(split)) nrow=length(split) split=matrix(unlist(split),nrow=nrow,byrow=T) split2=matrix(unlist(strsplit(split[,ncols],split = '.',fixed=T)),ncol=2,byrow=T) out=matrix(nrow=length(files),ncol=ncol(split)+ncol(split2)-1) out[keepers,]=cbind(split[,-ncols,drop=FALSE],split2) out=as.data.frame(cbind(out,filenames,folders,paths),stringsAsFactors=F) colnames(out)=c('date','batch','chromatography','polarity','sample','injection','fileformat','filename','folder','path') out$injection=as.numeric(out$injection) out$batch=factor(out$batch) out$chromatography=factor(out$chromatography) out$polarity=factor(out$polarity) out$fullname=files if(filesize) out$sizeMB=file.info(files)$size/1024^2 if(complete) out=getComplete(out) out=out[order(out$chromatography,out$polarity,out$date,out$batch,out$injection),] return(out) } LCMS_data <- getFilesMAX(files_location) save(LCMS_data, file='/tmp/LCMS_data.rda') #define groups# LCMS_data$group <- rep(NA,nrow(LCMS_data)) LCMS_data$group[LCMS_data$sample %>% grep("sQC",.)] <- "sQC" LCMS_data$group[LCMS_data$sample %>% grep("ltQC",.)] <- "ltQC" LCMS_data$group[-c(LCMS_data$sample %>% grep("sQC",.), LCMS_data$sample %>% grep("ltQC",.))] <- "sample" LCMS_data_names <- LCMS_data %>% extractNames() #LCMS_data_after_names <- LCMS_data_after %>% extractNames() #---------------------------------create phenodata data.phrame and load object--------------------------------------------------------------------------- pd <- data.frame( sample_name = sub(basename(LCMS_data_names), pattern = ".mzML", replacement = "", fixed = TRUE), sample_group = LCMS_data$group, stringsAsFactors = FALSE) raw_data <- readMSData(files = LCMS_data_names, pdata = new("NAnnotatedDataFrame", pd), mode = "onDisk") #----------------------------------visual data inspection--------------------------------------------------------------------------- #organise mz by file# mzs <- mz(raw_data) #split the list by file# mzs_by_file <- split(mzs,f=fromFile(raw_data)) length(mzs_by_file) #plot of base peak chromatograms# bpis <- chromatogram(raw_data, aggregationFun = "max") jpeg("/tmp/MAX_RP-NEG_bpc.jpg") plot(bpis) #plotbpc(bpis) dev.off() saveRDS(bpis, "/tmp/MAX_bpis_RP-NEG_bpc.rds")