# HW4 ```_ MLL_train<-read.delim("MLL_train.txt") rownames(MLL_train)<-MLL_train[,1] MLL_train_data<-as.matrix(MLL_train[-c(grep("AFFX",row.names(MLL_train))),-c(1,2)]) #remove gene name include AFFX, name and description nor<-function(x){ #normalization min.x<-min(x) max.x<- max(x) (x-min.x)/(max.x-min.x) } samplename<-colnames(MLL_train_data) MLL_train_data<-t(apply(MLL_train_data,1,nor)) group_name<-c("ALL","MLL","AML") MLL_train_data_T_TEST=matrix(0,nrow=3,ncol=12533) top10COND=matrix(0,nrow=3,ncol=10) #greater for(i in 1:nrow(MLL_train_data)){ for(j in 1:3){ MLL_train_data_T_TEST[j,i]=t.test(MLL_train_data[i,grepl(group_name[j],samplename)], MLL_train_data[i,!grepl(group_name[j],samplename)], alternative = "greater")$p.value } } colnames(MLL_train_data_T_TEST)<-rownames(MLL_train_data) #top10COND1_names<-order(MLL_train_data_T_TEST[1,])[1:10] access ranked name #x<-colnames(MLL_train_data_T_TEST)[top10COND1_names] top10COND1<-as.matrix(sort(MLL_train_data_T_TEST[1,])[1:10]) top10COND2<-as.matrix(sort(MLL_train_data_T_TEST[2,])[1:10]) top10COND3<-as.matrix(sort(MLL_train_data_T_TEST[3,])[1:10]) colnames(top10COND1)<-"p-value" colnames(top10COND2)<-"p-value" colnames(top10COND3)<-"p-value" write.csv(top10COND1, "condition1_top10.csv") write.csv(top10COND2, "condition2_top10.csv") write.csv(top10COND3, "condition3_top10.csv") #SVM library(e1071) Xtest<- t(MLL_train_data)#confusion matrix Ytrain<- as.factor(c(gsub("^ALL_\\d+","ALL",c(rownames(Xtest[1:20,]))), gsub("^MLL_\\d+|^MLL__\\d+","MLL",c(rownames(Xtest[21:37,]))), gsub("^AML_\\d+","AML",c(rownames(Xtest[38:57,]))))) the_3_classes<-svm(Xtest,Ytrain) summary(the_3_classes) pred<-predict(the_3_classes,Xtest) ans<-table(pred,Ytrain) ans write.csv(ans, "3CLASSES.csv") ##Three 2-class classification problems, each case using all genes #ALL vs Other Ytrain_ALL_vs_Other<-as.factor(c(gsub("^ALL_\\d+","ALL",c(rownames(Xtest[1:20, ]))), gsub("^MLL_\\d+|^MLL__\\d+|^AML_\\d+","MLL+AML",c(rownames(Xtest[21:57, ]))))) ALL_vs_Other_SVM<-svm(Xtest,Ytrain_ALL_vs_Other) summary(ALL_vs_Other_SVM) pred_ALL_vs_Other_SVM<-predict(ALL_vs_Other_SVM,Xtest) ans<-table(pred_ALL_vs_Other_SVM,Ytrain_ALL_vs_Other) ans write.csv(ans, "ALL_vs_MLL+AML.csv") #MLL vs Other Ytrain_MLL_vs_Other<-as.factor(c(gsub("^MLL_\\d+|^MLL__\\d+","MLL",c(rownames(Xtest[21:37, ]))), gsub("^ALL_\\d+|^AML_\\d+","ALL+AML",c(rownames(Xtest[c(1:20, 38:57), ]))))) MLL_vs_Other_SVM<-svm(Xtest,Ytrain_MLL_vs_Other) summary(MLL_vs_Other_SVM) pred_MLL_vs_Other_SVM<-predict(MLL_vs_Other_SVM,Xtest) ans<-table(pred_MLL_vs_Other_SVM,Ytrain_MLL_vs_Other) ans write.csv(ans, "MLL_vs_ALL+AML.csv") #AML vs Other Ytrain_AML_vs_Other<-as.factor(c(gsub("^AML_\\d+","AML",c(rownames(Xtest[38:57,]))), gsub("^ALL_\\d+|^MLL_\\d+|^MLL__\\d+","ALL+MLL",c(rownames(Xtest[1:37,]))))) AML_vs_Other_SVM<-svm(Xtest,Ytrain_AML_vs_Other) summary(AML_vs_Other_SVM) pred_AML_vs_Other_SVM<-predict(AML_vs_Other_SVM,Xtest) ans<-table(pred_AML_vs_Other_SVM,Ytrain_AML_vs_Other) ans write.csv(ans, "AML_vs_ALL+MLL.csv") #Three 2-class classification problems, each case using 10 selected genes #ALL vs MLL+AML #top10COND1 Xtest_con1top10<-t(cbind(MLL_train_data[rownames(top10COND1),])) Ytrain_ALLtop10<-as.factor(c(gsub("^ALL_\\d+","ALL",c(rownames(Xtest_con1top10[1:20, ]))), gsub("^MLL_\\d+|^MLL__\\d+|^AML_\\d+","MLL+AML",c(rownames(Xtest_con1top10[21:57, ]))))) ALLtop10_vs_Other_SVM<-svm(Xtest_con1top10,Ytrain_ALLtop10) summary(ALLtop10_vs_Other_SVM) pred_ALLtop10_vs_Other_SVM<-predict(ALLtop10_vs_Other_SVM,Xtest_con1top10) ans<-table(pred_ALLtop10_vs_Other_SVM,Ytrain_ALLtop10) ans write.csv(ans, "ALLtop10_vs_MLL+AML.csv") #MLL vs ALL+AML #top10COND2 Xtest_con2top10<-t(cbind(MLL_train_data[rownames(top10COND2),])) Ytrain_MLLtop10<-as.factor(c(gsub("^MLL_\\d+|^MLL__\\d+","MLL",c(rownames(Xtest_con2top10[21:37, ]))), gsub("^ALL_\\d+|^AML_\\d+","ALL+AML",c(rownames(Xtest_con2top10[c(1:20, 38:57), ]))))) MLLtop10_vs_Other_SVM<-svm(Xtest_con2top10,Ytrain_MLLtop10) summary(MLLtop10_vs_Other_SVM) pred_MLLtop10_vs_Other_SVM<-predict(MLLtop10_vs_Other_SVM,Xtest_con2top10) ans<-table(pred_MLLtop10_vs_Other_SVM,Ytrain_MLLtop10) ans write.csv(ans, "MLLtop10_vs_ALL+AML.csv") #AML vs ALL+MLL #top10COND3 Xtest_con3top10<-t(cbind(MLL_train_data[rownames(top10COND3),])) Ytrain_AMLtop10<-as.factor(c(gsub("^AML_\\d+","AML",c(rownames(Xtest_con3top10[38:57,]))), gsub("^ALL_\\d+|^MLL_\\d+|^MLL__\\d+","ALL+MLL",c(rownames(Xtest_con3top10[1:37,]))))) AMLtop10_vs_Other_SVM<-svm(Xtest_con3top10,Ytrain_AMLtop10) summary(AMLtop10_vs_Other_SVM) pred_AMLtop10_vs_Other_SVM<-predict(AMLtop10_vs_Other_SVM,Xtest_con3top10) ans<-table(pred_AMLtop10_vs_Other_SVM,Ytrain_AMLtop10) ans write.csv(ans, "AMLtop10_vs_ALL+MLL.csv") ##A 3-class classification problem using the combination of 30 selected genes Xtest_30gene<-cbind(Xtest_con1top10,Xtest_con2top10,Xtest_con3top10) Ytrain_30gene<-as.factor(c(gsub("^ALL_\\d+","ALL",c(rownames(Xtest_30gene[1:20,]))), gsub("^MLL_\\d+|^MLL__\\d+","MLL",c(rownames(Xtest_30gene[21:37,]))), gsub("^AML_\\d+","AML",c(rownames(Xtest_30gene[38:57,]))))) genes30_SVM <-svm(Xtest_30gene, Ytrain_30gene) summary(genes30_SVM) pred_30gene <- predict(genes30_SVM, Xtest_30gene) ans<-table(pred_30gene, Ytrain_30gene) ans write.csv(ans, "gene30_SVM.csv") ```