---
tags: Source
---
# PeakPicking_post_BW.R
library(xcms)
library(CMSITools)
library(tidyverse)
library(stringi)
library(StatTools)
register(bpstart(MulticoreParam(12)))
source('/tmp/src/getFilesMAX.R')
args <- commandArgs(trailingOnly = TRUE)
filename <- args[1]
filepath <- args[2]
filepath <- file.path(filepath, filename)
# inserting the file location here instead as getFilesMax
files_location <- "/data/"
LCMS_data <- getFilesMAX(files_location)
save(LCMS_data, file=paste(filepath, ".rda", sep = ""))
#----------------------------------define groups-------------------------------------------------------------------------
LCMS_data$group <- rep(NA,nrow(LCMS_data))
LCMS_data$group[LCMS_data$sample %>% grep("sQC",.)] <- "sQC"
LCMS_data$group[LCMS_data$sample %>% grep("ltQC",.)] <- "ltQC"
LCMS_data$group[-c(LCMS_data$sample %>% grep("sQC",.), LCMS_data$sample %>% grep("ltQC",.))] <- "sample"
LCMS_data_names <- LCMS_data %>% extractNames()
#--------------------------------Alignment--------------------------------------------------------------------------
xdata <- readRDS(file="/tmp/data/rp/xcms/algn/algn_bw2.4_algn.rds")
#--------------------------------Corresponding--------------------------------------------------------------------------
pdp <- PeakDensityParam(sampleGroups = xdata$sample_group,
minFraction = 0.95, bw = 2.4, binSize = 0.924)
xdata <- groupChromPeaks(xdata, param = pdp)
#saveRDS(xdata, file = "/tmp/LCMS_data_CORR.rds")
saveRDS(xdata, file=paste(filepath, "_CORR.rds", sep = ""))
#-------------------------------Fill in---------------------------------------------------------------------------------
#---------------------------------------PT_noFill--------------------------------------
getTable <- function(XObj) {
table <- featureValues(XObj, value = "into")
cat('\nConversion of',deparse(substitute(XObj)),'into peak table')
cat('\n',ncol(table),'samples (rows)')
cat('\n',nrow(table),'features (columns)')
perc=paste0('(',signif(100*sum(is.na(table))/prod(dim(table)),3),'% of data)')
cat('\n',sum(is.na(table)),'missing values',perc)
NASamp <- apply(table,2,function(x) sum(is.na(x)))
NASamp <- NASamp / nrow(table)
NAFeat <- apply(table,1,function(x) sum(is.na(x)))
NAFeat <- NAFeat / ncol(table)
par(mfrow=c(1,2))
hist(NASamp,main='Missingness per sample',xlab='Proportion')
hist(NAFeat,main='Missingness per feature',xlab='Proportion')
par(mfrow=c(1,1))
featInfo <- featureDefinitions(XObj)
featInfo <- paste(featInfo$mzmed,featInfo$rtmed,sep='@')
rownames(table) <- featInfo
table <- t(table)
}
PT_NOFill <- getTable(xdata)
write.csv(PT_NOFill, file=paste(filepath, "_PT_NOFill.csv", sep = ""))
saveRDS(PT_NOFill, file=paste(filepath, "_PT_NOFill.rds", sep = ""))
#-----------------------------------------------Peak_fill---------------------------------------------
# ppm from IPO PP average
fpp <- FillChromPeaksParam(ppm=20.68, fixedRt = 2, expandMz = 0.1)
xdata <- fillChromPeaks(xdata, param = fpp, BPPARAM = SerialParam())
#saveRDS(xdata, "/tmp/LCMS_data_FILLED.rds")
saveRDS(xdata, file=paste(filepath, "_FILLED.rds", sep = ""))
Missing_beforeFill<- apply(featureValues(xdata, filled = FALSE), MARGIN = 2,
FUN = function(z) sum(is.na(z)))
Missing_afterFill<- apply(featureValues(xdata), MARGIN = 2,
FUN = function(z) sum(is.na(z)))
Miss <- cbind.data.frame(Missing_beforeFill, Missing_afterFill)
print("Missingnes:")
print(Miss)
#--------------------------------Imputation-----------------------------------------------------------------------------
PT_fill <- getTable(xdata)
PT_fill_imputation <- mvImpWrap(MAT = PT_fill , method = "RF", rfMeth = "ranger", guess = "minVar", n2 = 50)
write.csv(PT_fill_imputation, file = paste(filepath, "_IMPUTED.csv", sep = ""))
saveRDS(PT_fill_imputation, file = paste(filepath, "_IMPUTED.rds", sep = ""))