---
tags: Source
---
# ExtractClusters.R
# TEAM: was done for RP data
# save the RamClust object
library(RAMClustR)
library(xcms)
library(tidyverse)
register(bpstart(SnowParam(12)))
#-----------------------------------Ramclust and do.findmain--------------------------------
RP <- read.csv("/tmp/data/rp/clust/peakTable.csv", row.names = 1, check.names = FALSE, stringsAsFactors = FALSE)
head(colnames(RP))
head(rownames(RP))
expDes <- defineExperiment(force.skip = T)
RP_ramclust_obj <- ramclustR(ms='peakTable.csv', st = 1.25, sr=0.4, maxt = 5, timepos = 2, sampNameCol = 1, featdelim = '@', ExpDes = expDes)
saveRDS(RP_ramclust_obj, file = "RP_ramclust_obj.rds")
RC1_Findmain <- do.findmain(RP_ramclust_obj, mode = "positive", mzabs.error = 0.01, ppm.error = 10,
ads = c("[M-H]-","[M-2H]2-","[M-2H+Na]-", "[M-H+Cl]2-","[M-2H+K]-","[M+Cl]-",
"[2M-H]-","[2M-2H]2-", "[2M-2H+Na]-", "[2M-H+Cl]2-", "[2M-2H+K]-",
"[2M+Cl]-"),
nls = c("[M-H-H2O]-"))
saveRDS(RC1_Findmain, file = "RC1_Findmain.rds")
#------------------------------------------------------------------------------------------
my_doFindMain <- RC1_Findmain
#Extract feature clusters
my_ramclusters <- my_doFindMain$M.ann
#Assign cluster@Rt to cluster name
names(my_ramclusters) <- paste0("RP", paste0(my_doFindMain$cmpd, "@"),my_doFindMain$clrt %>% round(.,2))
#Extract the features with the highest intensity for each cluster (cluster representative)
Max_int<- lapply(my_ramclusters, function(x) x[which.max(x$int), ])
names(Max_int) <- names(my_ramclusters)
#Get the name of these features
name_of_max_int_feature <- c()
for (i in 1:length(my_ramclusters)) {
position_of_max_int<- as.integer(rownames(my_ramclusters[[i]])[my_ramclusters[[i]]$int %>% which.max()])#Locate the cluster representative int within each cluster
features_per_cluster<- my_doFindMain$labels[which(my_doFindMain$featclus==i)]#Retrieve the original feature names within each cluster
name_of_max_int_feature <- c(name_of_max_int_feature,features_per_cluster[position_of_max_int])# Combine the feature names
}
name_of_max_int_feature
name_of_max_int_feature <- sub("_","@",name_of_max_int_feature)
name_of_max_int_feature
#Extract the singleton names
singleton_names <- my_doFindMain$labels[which(my_doFindMain$featclus==0)]
singleton_names
singleton_names <- sub("_","@", singleton_names)
singleton_names
#The final output of Ramclust: combine the cluster representatves and singletons
features_extracted_ram <- c(name_of_max_int_feature,singleton_names)
head(features_extracted_ram)
#Find the features in original peaktable and extract the intensity
Ramclust_in_origPT <- RP[,colnames(RP)%in%features_extracted_ram]#Refine the origial PT based on ramclust output
cluster_name_postion_in_origPT<- which(colnames(Ramclust_in_origPT)%in%name_of_max_int_feature)#Locate which features are cluster representatives
#Re-arrange the cluster names based on the cluster representative position in the final PT
cluster_names_in_origPT<- colnames(Ramclust_in_origPT)[cluster_name_postion_in_origPT]
rearranged_cluster_names<- names(my_ramclusters)[match(cluster_names_in_origPT,name_of_max_int_feature)]
#Prepare the final PT
RP_ramclust_annotated_PT<- Ramclust_in_origPT
#Replace the cluster representative names with cluster name
colnames(RP_ramclust_annotated_PT)[cluster_name_postion_in_origPT] <- rearranged_cluster_names
#Extract the singleton names
singletons_PTnames<- colnames(RP_ramclust_annotated_PT)[-cluster_name_postion_in_origPT]
##Split mz and rt values
list_mz_rt<- singletons_PTnames %>% strsplit(.,"@")#change to your feature name linking mark (e.g. @)
##Round the m/z and rt values for singletons
mz_names<- unlist(lapply(list_mz_rt, "[[", 1)) %>% as.numeric()%>% round(.,4)
rt_names<- unlist(lapply(list_mz_rt, "[[", 2)) %>% as.numeric()%>% round(.,2)
##Replace the names of singletons
singletons_final_PTnames<- paste(paste0("RP",mz_names),rt_names, sep = "@")# change to the mode RP or RN
## check the names
singletons_final_PTnames[1:5]
singletons_PTnames[1:5]
colnames(RP_ramclust_annotated_PT)[-cluster_name_postion_in_origPT] <- singletons_final_PTnames
#Double-check
random_nums <- sample(ncol(RP_ramclust_annotated_PT),10)
colnames(Ramclust_in_origPT)[random_nums]
colnames(RP_ramclust_annotated_PT)[random_nums]
#Export feature table
write.csv(RP_ramclust_annotated_PT, file = "MAX_RP_metabolomics_final.csv")
#----------------------------------Prepare feature cluster meta data-----------------------
feature_cluster_meta_all <- list()
for (i in 1: length(my_ramclusters)) {
psuedo = my_ramclusters[[i]]
cluster_names = names(my_ramclusters)[i]
feature_cluster_meta_all[[i]] = cbind.data.frame(cluster_names=rep(cluster_names, nrow(psuedo)), psuedo)
}
feature_cluster_meta_all_final <- do.call(rbind.data.frame, feature_cluster_meta_all)
rownames(feature_cluster_meta_all_final) <- NULL
feature_cluster_meta_sim_final <- feature_cluster_meta_all_final[,-c(7,8,9)]
write.csv(feature_cluster_meta_all_final, file = "MAX_RP_featureCluster_meta_all.csv")
write.csv(feature_cluster_meta_sim_final, file = "MAX_RP_featureCluster_meta_simple.csv")
#------------------Prepare the max_int_cluster_meta--------------------------------------
max_int_cluster_meta<- do.call(rbind.data.frame, Max_int)
max_int_cluster_meta_final <- max_int_cluster_meta[,1,drop = FALSE]
write.csv(max_int_cluster_meta_final, file = "MAX_RP_cluster_representatives_meta.csv")