---
tags: Source
---
# 4.1. BatchCorr.R
library(dplyr)
library(devtools)
library(CMSITools)
library(batchCorr)
library(ranger)
library(devtools)
library(doParallel)
library(BiocParallel)
library(MSnbase)
library(xcms)
library(tidyverse)
library(stringi)
register(bpstart(SnowParam(12)))
source('/tmp/src/getFilesMAX.R')
args <- commandArgs(trailingOnly = TRUE)
filename <- args[1]
filepath <- args[2]
filepath <- file.path(filepath, filename)
PT_NOFill <- readRDS("/tmp/data/rp/xcms/xcms_fill/xcms_POS_PT_NOFill.rds")
PT_fill_imputation <- readRDS("/tmp/data/rp/xcms/xcms_fill/xcms_POS_IMPUTED.rds")
#----load samples and define groups----
files_location <- "/data/"
LCMS_data <- getFilesMAX(files_location)
LCMS_data$group <- rep(NA,nrow(LCMS_data))
LCMS_data$group[LCMS_data$sample %>% grep("sQC",.)] <- "sQC"
LCMS_data$group[LCMS_data$sample %>% grep("ltQC",.)] <- "ltQC"
LCMS_data$group[-c(LCMS_data$sample %>% grep("sQC",.), LCMS_data$sample %>% grep("ltQC",.))] <- "sample"
LCMS_data_names <- LCMS_data %>% extractNames()
#---perform batch alignment---
# Extract peakinfo (i.e. m/z and rt of features)
peakIn <- peakInfo(PT = PT_NOFill, sep = '@', start = 3) # These column names have 2 leading characters describing LC-MS mode -> start at 3
# Perform multi-batch alignment
alignBat <- alignBatches(peakInfo = peakIn, PeakTabNoFill = PT_NOFill, PeakTabFilled = PT_fill_imputation, batches = LCMS_data$batch, sampleGroups = LCMS_data$group, selectGroup = 'sQC') #testar om den kan göra en ajignment som är bättre
# --- correct drifts ---
batch_num <- 0
correctedBatches <- list()
# batch code as in "B4W5"
for (batch_code in as.character(unique(LCMS_data$batch))){
batch_num <- batch_num + 1
print(paste("Drift correction for batch", batch_num, ": ", batch_code))
selected_batch <- getBatch(peakTable = PT_fill_imputation, meta = LCMS_data, batch = LCMS_data$batch, select = batch_code)
# TODO: modelNames and G selection is the core optimization, see tutorial
drift_correction <- correctDrift(peakTable = selected_batch$peakTable, injections = selected_batch$meta$injection, sampleGroups =
selected_batch$meta$group, QCID = "sQC", G = seq(5,35, by = 3) )
correctedBatches[[batch_num]] <- drift_correction
}
save(correctedBatches, file=paste(filepath, "_corrected.rda", sep = ""))
peakTdata <- mergeBatches(correctedBatches)
# --- normalize samples ---
PT_data_norm <- normalizeBatches(peakTableCorr = peakTdata$peakTableCorr, peakTableOrg = peakTdata$peakTableOrg, batches=LCMS_data$batch, sampleGroup = LCMS_data$group, refGroup = 'sQC', population='sample')
sum(is.na(PT_data_norm$peakTable))
sum(is.nan(PT_data_norm$peakTable))
print(paste("is.na: ", sum(is.na(PT_data_norm$peakTable))))
print(paste("is.nan: ", sum(is.nan(PT_data_norm$peakTable))))
save(LCMS_data, PT_data_norm, file=paste(filepath, "_batchcorr_final.rda", sep = ""))