---
tags: Source
---
# Chromatogram.R
#load packages#
library(xcms)
library(CMSITools)
library(tidyverse)
library(stringi)
register(bpstart(MulticoreParam(12)))
# inserting the file location here instead as getFilesMax
files_location <- "/data/"
#import raw LCMS files, by the changed getFiles function#
#source('getFilesMAX.R')
getFilesMAX=function(path,pattern='.mzML',complete=TRUE,filesize=FALSE) {
cat('\n\n')
message('Double-click on a file in a folder with LCMS files -> Choose all files in folder & subdirectories\n')
cat('Only files containing the specified pattern (".mzML" as default) are selected.\n')
cat('For other formats (or file naming patterns), specify e.g. pattern=".cdf" in the function call\n')
cat('If you want to start picking from a folder where no LCMS files are present,\n')
cat('you can put a dummy file ("dummy.txt" or similar) in that folder and double click on it.\n')
cat('\n\n')
if(missing(path)) path=dirname(file.choose())
files=list.files(path, recursive = TRUE, full.names = TRUE)
files=grep(pattern,files,value=T,fixed = T)
paths=dirname(files)
folders=strsplit(paths,"/")
folders=sapply(folders,function(x) x[length(x)])
filenames=basename(files)
split=strsplit(filenames,split = '_')
nSplit <- length(split)
# sapply(split, length)
# sapply(split, length) %>% table
for (i in 1:nSplit) {
string <- split[[i]]
if (length(string)==7) {
string[5] <- paste(string[5:6], collapse='-')
string <- string[-6]
}
split[[i]] <- string
}
ncols=6
if(length(table(sapply(split,length)))!=1) {
warning('\nNon-uniform filename strategy!')
if (complete) {
warning('\nNon-conforming samples were omitted.')
} else {
warning('\nNon-conforming samples were kept in the output.')
warning('\nUse getComplete() to filter out non-conforming samples.')
}
keepers=sapply(split,length)==ncols
split=split[keepers]
} else keepers=rep(TRUE,length(split))
nrow=length(split)
split=matrix(unlist(split),nrow=nrow,byrow=T)
split2=matrix(unlist(strsplit(split[,ncols],split = '.',fixed=T)),ncol=2,byrow=T)
out=matrix(nrow=length(files),ncol=ncol(split)+ncol(split2)-1)
out[keepers,]=cbind(split[,-ncols,drop=FALSE],split2)
out=as.data.frame(cbind(out,filenames,folders,paths),stringsAsFactors=F)
colnames(out)=c('date','batch','chromatography','polarity','sample','injection','fileformat','filename','folder','path')
out$injection=as.numeric(out$injection)
out$batch=factor(out$batch)
out$chromatography=factor(out$chromatography)
out$polarity=factor(out$polarity)
out$fullname=files
if(filesize) out$sizeMB=file.info(files)$size/1024^2
if(complete) out=getComplete(out)
out=out[order(out$chromatography,out$polarity,out$date,out$batch,out$injection),]
return(out)
}
LCMS_data <- getFilesMAX(files_location)
save(LCMS_data, file='/tmp/LCMS_data.rda')
#define groups#
LCMS_data$group <- rep(NA,nrow(LCMS_data))
LCMS_data$group[LCMS_data$sample %>% grep("sQC",.)] <- "sQC"
LCMS_data$group[LCMS_data$sample %>% grep("ltQC",.)] <- "ltQC"
LCMS_data$group[-c(LCMS_data$sample %>% grep("sQC",.), LCMS_data$sample %>% grep("ltQC",.))] <- "sample"
LCMS_data_names <- LCMS_data %>% extractNames()
#LCMS_data_after_names <- LCMS_data_after %>% extractNames()
#---------------------------------create phenodata data.phrame and load object---------------------------------------------------------------------------
pd <- data.frame(
sample_name = sub(basename(LCMS_data_names), pattern = ".mzML",
replacement = "", fixed = TRUE),
sample_group = LCMS_data$group, stringsAsFactors = FALSE)
raw_data <- readMSData(files = LCMS_data_names, pdata = new("NAnnotatedDataFrame", pd),
mode = "onDisk")
#----------------------------------visual data inspection---------------------------------------------------------------------------
#organise mz by file#
mzs <- mz(raw_data)
#split the list by file#
mzs_by_file <- split(mzs,f=fromFile(raw_data))
length(mzs_by_file)
#plot of base peak chromatograms#
bpis <- chromatogram(raw_data, aggregationFun = "max")
jpeg("/tmp/MAX_RP-NEG_bpc.jpg")
plot(bpis)
#plotbpc(bpis)
dev.off()
saveRDS(bpis, "/tmp/MAX_bpis_RP-NEG_bpc.rds")