---
tags: Source
---
# 5.1. Clustering.R
# TEAM: this script was done for RP
library(devtools)
library(RAMClustR)
library(xcms)
load("/tmp/data/rp/bcorr/MAX_POS_batchcorr_final.rda")
peakTab <- PT_data_norm$peakTable
write.csv(peakTab, file="peakTable.csv")
RNdata <- readRDS(file="/tmp/data/rp/xcms/xcms_fill/xcms_POS_CORR.rds")
# stRP=round(median(featRP$rtmax-featRP$rtmin)/2, digits = 2)
# stRN=round(median(featRN$rtmax-featRN$rtmin)/2, digits = 2)
plotClust=function(ram,clustnr,xcmsData,samps,dtime=5,dmz=.05) {
if(missing(samps)) {
nSamp=nrow(ram$SpecAbund)
samps=1:nSamp
} else nSamp=length(samps)
whichFeats=which(ram$featclus==clustnr)
peakMeta=cbind(ram$fmz,ram$frt)
pkMetaGrp=peakMeta[whichFeats,]
rtr=ram$clrt[clustnr]+c(-dtime,dtime)
rtr[rtr<0]=0
mzr=cbind(ram$fmz[whichFeats]-dmz,ram$fmz[whichFeats]+dmz)
chr <- chromatogram(xcmsData, mz = mzr, rt = rtr)
plot(0:1,0:1,type='n',axes=F,xlab='Retention time (s)', ylab='Intensity (AU)',main=paste0('RAM cluster ',clustnr,'; RT ',signif(ram$clrt[clustnr],5),'s'))
box(bty='l')
for (pk in 1:length(whichFeats)) {
rts=ints=list()
for (samp in 1:nSamp) {
rts[[samp]]=chr[pk,samps[samp]]@rtime
ints[[samp]]=chr[pk,samps[samp]]@intensity
}
nrts=min(sapply(rts,length))
rts=sapply(rts,function(x) x[1:nrts])
rts=rowMeans(rts)
ints=sapply(ints,function(x) x[1:nrts])
ints=rowMeans(ints,na.rm=T)
par(new=T)
plot(rts,ints,type='l',col=pk+1,ylim=c(0,max(ints,na.rm=T)),axes=F,xlab='',ylab='')
}
axis(1)
legend('topright',legend = paste0('F',whichFeats,'@mz',signif(pkMetaGrp[,1],5)), lty=1,col=(1:length(whichFeats))+1,bty='n')
}
expDes=defineExperiment(force.skip = T)
sr=c(.3,.4,.5) # styr hur tar ut features
st=c(.5,1,1.5,2) # styr hur tar ut features
maxt=c(5)
par=expand.grid(st=st,sr=sr,maxt=maxt)
nClust=nSing=sizeMax=sizeMed=sizeMean=numeric(nrow(par))
nFeat=list()
samps=sample(1:nrow(PT_data_norm$peakTable),20)
register(bpstart(SnowParam(7))) # Choose as many cores as you can/want
for (i in 1:nrow(par)) {
RRP=ramclustR(ms='peakTable.csv', st = par$st[i], sr=par$sr[i], maxt = par$maxt[i], timepos = 2, sampNameCol = 1, featdelim = '@', ExpDes = expDes)
nClust[i]=length(RRP$cmpd)
nSing[i]=RRP$nsing
sizeMax[i]=max(RRP$nfeat)
sizeMed[i]=median(RRP$nfeat)
sizeMean[i]=mean(RRP$nfeat)
nFeat[[i]]=RRP$nfeat
pdf(file=paste0('clusts_par',i,'.pdf'),width=15,height=8)
par(mfrow=c(4,5),mar=c(4,4,2,0)+.5)
clusts=round(c(2:6,seq(7,max(RRP$featclus),length.out = 15)))
for (c in clusts) {
plotClust(ram = RRP,clustnr = c,xcmsData = RNdata,samps = samps)
}
dev.off()
}
bpstop()
cbind(par,nClust,nSing,sizeMax,sizeMean,sizeMed)