Dra. E. Ernestina Godoy Lozano
elizabeth.godoy@insp.mx
tinagodoy@gmail.com
ssh -X alumnoX@132.248.32.180
Ir al directorio EMetagen
cd EMetagen
Descomprimir modulo de trabajo
tar -xzvf /Share/EMetagen/16S_analisis.tar.gz
Abrir un navegador web
Ir a la siguiente direccion (link/liga):
http://132.248.32.180:8787
Ingresar usuario (Username) y contraseña (Password)
¡Perfecto! Haz ingresado al R Studio Server!!!
getwd()
setwd("~/EMetagen/16S_analisis")
dir()
Usaremos la función library()
library(gplots)
library(ggplot2)
library(vegan)
library(reshape2)
library(metagenomeSeq)
library(RColorBrewer)
library(phyloseq)
library(MASS)
library(gtools)
Usaremos la función source()
source("scripts/pairwise.adonis.R")
source("scripts/abund_relativa.R")
source("scripts/taxonomic_levels.R")
Diapositivas: 15-17
matrix_abund <- read.delim("data/TablaFinal.txt", header=TRUE)
View(matrix_abund)
dim(matrix_abund)
tabla <- as.matrix(data.frame(matrix_abund[,2:16]))
rownames(tabla) <- matrix_abund$taxonomy
View(tabla)
exportMat(tabla, file ="data/integrated_matrix.txt")
asignacion_taxonomica <- data.frame(completo=rownames(tabla))
View(asignacion_taxonomica)
asignacion_taxonomica$Domain <- sapply(as.character(asignacion_taxonomica$completo), function(x){strsplit(x, ";")[[1]][1]})
asignacion_taxonomica$Phylum <- sapply(as.character(asignacion_taxonomica$completo), function(x){strsplit(x, ";")[[1]][2]})
asignacion_taxonomica$Class <- sapply(as.character(asignacion_taxonomica$completo), function(x){strsplit(x, ";")[[1]][3]})
asignacion_taxonomica$Order <- sapply(as.character(asignacion_taxonomica$completo), function(x){strsplit(x, ";")[[1]][4]})
asignacion_taxonomica$Family <- sapply(as.character(asignacion_taxonomica$completo), function(x){strsplit(x, ";")[[1]][5]})
asignacion_taxonomica$Genus <- sapply(as.character(asignacion_taxonomica$completo), function(x){strsplit(x, ";")[[1]][6]})
asignacion_taxonomica$Species <- sapply(as.character(asignacion_taxonomica$completo), function(x){strsplit(x, ";")[[1]][7]})
View(asignacion_taxonomica)
Usaremos la funcion taxa_levels() que cargamos previamente
taxonomic_levels("data/TablaFinal.txt", "data")
dir("data")
Diapositiva 18
View(tabla)
totales <- colSums(tabla)
totales
tabla_rel <- matrix(nrow=dim(tabla)[1], ncol=dim(tabla)[2])
colnames(tabla_rel) <- colnames(tabla)
rownames(tabla_rel) <- rownames(tabla)
for (i in 1:dim(tabla)[1]){
for (j in 1:dim(tabla)[2]){
tabla_rel[i,j] <- (tabla[i,j]/totales[j])*100
}
}
View(tabla_rel)
colSums(tabla_rel)
exportMat(tabla_rel, file ="data/percent_integrated_matrix.txt")
abund_relativa("data/Genus.txt", "data/percent_Genus.txt")
abund_relativa("data/Class.txt", "data/percent_Class.txt")
dir("data")
Diapositivas: 19- 21
tabla <- read.delim("data/percent_Class.txt", header=T, row.names=1)
View(tabla)
dim(tabla)
maxab <- apply(tabla, 1, mean)
length(maxab)
head(maxab)
cut <- 1
abundancias <- maxab[which(maxab > cut)]
Número de taxas con los que nos quedaremos
length(abundancias)
subtable <- tabla[which(rownames(tabla) %in% names(abundancias)),]
dim(subtable)
tabla_taxas <- data.frame(abundancias)
tabla_taxas$taxa <- names(abundancias)
df_ordenado <- tabla_taxas[order(tabla_taxas$abundancias, decreasing =T ),]
orden <- df_ordenado$taxa
new <- matrix(nrow=dim(subtable)[1], ncol=dim(subtable)[2])
colnames(new) <- colnames(subtable)
rownames(new) <- orden
Llenar la nueva matrix de acuerdo al orden
for (i in 1:dim(new)[2]){
numCol <- which(colnames(tabla)==colnames(new)[i])
for (j in 1:dim(new)[1]){
numRow <- which(rownames(tabla)==rownames(new)[j])
new[j,i] <- tabla[numRow, numCol]
}
}
View(new)
totales <- colSums(new)
subtable2 <- as.data.frame(matrix(ncol=dim(new)[2], nrow=1))
colnames(subtable2) <- colnames(new)
rownames(subtable2) <- paste("Abundance <", cut, "%", sep="")
for (i in 1:dim(subtable2)[2]){
subtable2[1,i] <- 100-totales[i]
}
new <- rbind(new, subtable2)
exportMat(as.matrix(new), file ="data/percent_Class_more_1_perc.txt")
melt_subtable <- melt(new)
Añadir los nombres de los taxa en una columna extra
melt_subtable$Taxa <- as.character(rep(rownames(new), dim(new)[2]))
Ordene los nombres de los taxa de acuerdo al orden establecido
melt_subtable$Taxa <- factor(melt_subtable$Taxa, levels=rownames(new))
colnames(melt_subtable) <- c("Sample","Freq", "Taxa")
head(melt_subtable)
color = grDevices::colors()[grep('gr(a|e)y', grDevices::colors(), invert = T)]
Generar un vector de colores al azar con un color gris para los taxa menos abundantes
my_color=c(sample(color, dim(new)[1]-1, replace=FALSE), "gray")
ggplot(melt_subtable, aes(x= Sample, y=Freq, fill=Taxa)) +
geom_bar(stat="identity") +
scale_fill_manual(values=my_color, limits=rownames(new)) +
theme(axis.text.x = element_text(angle=30, hjust=1, vjust=1, size=6),
panel.background= element_rect(fill = NA, colour = "black"),
panel.grid.major = element_line(colour = "grey90"),
legend.key = element_rect(fill = "white"),
legend.text=element_text(size=6),
legend.position="bottom") +
labs(x = "", y = "Relative abundance (%)", fill="") +
scale_y_continuous(breaks=c(0, 20, 40, 60, 80, 100))
Guardar imagen con función ggsave()
ggsave("figuras/sock_plots_Class_more_1_perc.png",
device = "png", dpi = 300, units = "px", bg = "white")
Diapositivas: 23-29
library(qiime2R)
library(tidyverse)
tabla_qza <- read_qza("qiime2/table.qza")
str(tabla_qza)
class(tabla_qza)
names(tabla_qza)
View(tabla_qza$data)
head(tabla_qza$data)
taxonomia_qza <- read_qza("qiime2/taxonomy_blast.qza")
str(taxonomia_qza)
class(taxonomia_qza)
View(taxonomia_qza$data)
taxa_sep <- parse_taxonomy(taxonomia_qza$data)
View(taxa_sep)
class_table <- summarize_taxa(tabla_qza$data, taxa_sep)$Class
View(class_table)
metadata <- read_q2metadata("qiime2/metadata.tsv")
View(metadata)
taxa_barplot(class_table, metadata,"Zone")
taxa_heatmap(class_table, metadata,"Type")
taxa_barplot(class_table, metadata,"Zone") +
scale_fill_manual(values=my_color) +
theme(text = element_text(size=10), legend.position="bottom") +
scale_y_continuous(breaks=c(0, 20, 40, 60, 80, 100)) +
labs(fill="")
taxa_heatmap(class_table, metadata,"Type") +
scale_fill_gradientn(colours = terrain.colors(10))
taxa_heatmap(class_table, metadata,"Type") +
scale_fill_gradient(low = "purple", high = "green3", na.value = NA)
png("figuras/taxa_heatmap.png", width = 10*300, height = 8*300, res = 300, pointsize = 8)
taxa_heatmap(class_table, metadata,"Type")
dev.off()
physeq_qiime2 <- qza_to_phyloseq (
features="qiime2/table.qza",
tree="qiime2/rooted-tree.qza",
taxonomy="qiime2/taxonomy_blast.qza",
metadata = "qiime2/metadata.tsv")
physeq_qiime2
class(physeq_qiime2)
str(physeq_qiime2)
Diapositivas: 30-34
variables <- read.delim("data/abiotic_variables.txt", header=T, row.names=1)
abun_table <- read.delim("data/Genus.txt", header=T, row.names=1)
abun_table_mat <- as.matrix(abun_table)
dim(abun_table)
View(abun_table)
taxrank_mat <- as.matrix(rownames(abun_table_mat))
colnames(taxrank_mat) <- ("TAX")
rownames(taxrank_mat) <- rownames(abun_table_mat)
head(taxrank_mat)
OTU <- otu_table(abun_table_mat, taxa_are_rows=TRUE)
TAX <- tax_table(taxrank_mat)
MET <- sample_data(variables)
physeq <- phyloseq(OTU, TAX, MET)
str(physeq)
diversity_index <- data.frame(round(estimate_richness(physeq, measures=c("Observed", "Chao1", "Shannon", "Simpson")), 2))
diversity_index
Usar el objeto que construimos con qiime2R
diversity_index2 <- data.frame(round(estimate_richness(physeq_qiime2, measures=c("Observed", "Chao1", "Shannon", "Simpson")), 2))
diversity_index2
write.table(diversity_index, "data/alpha_diversity_index.txt", quote = FALSE, sep = "\t", row.names = TRUE, col.names=TRUE)
diversity_index$Clase <- sapply(as.character(rownames(diversity_index)),
function(x){strsplit(x, "_")[[1]][1]})
ggplot(diversity_index, aes(x=Clase, y=Shannon, fill=Clase)) +
geom_boxplot(show.legend=FALSE) +
geom_point(color="black", alpha=0.9, position = "jitter", shape=21, show.legend=FALSE) +
labs(x = "", y = "Indice de Shannon", fill="") +
scale_fill_manual(values=c("royalblue2", "tomato2", "seagreen3")) +
theme_minimal()
ggsave("figuras/boxplot_shannon.png", device = "png", dpi = 300, units = "px", bg = "white")
Diapositivas: 35-38
ordination <- ordinate(physeq, "PCoA", "bray")
plot_ordination(physeq, ordination, color="grupo")
Podemos usar la información de alguna variable continua
plot_ordination(physeq, ordination, color="Deep")
Cambiarle el color a otra paleta de colores
plot_ordination(physeq, ordination, color="Deep") +
scale_colour_gradientn(colours= topo.colors(10))
Vamos a usar la sintáxis de ggplot2 para mejorar las gráficas
plot_ordination(physeq, ordination, color="grupo") +
stat_ellipse() +
scale_color_manual(values= c("deeppink1", "darkturquoise", "gold")) +
labs(title="PCoA") +
theme_bw()
plot_ordination(physeq_qiime2, ordinate(physeq_qiime2, "PCoA", "wunifrac"), color="Type")
Guardar nuestro gráfico
plot_ordination(physeq, ordination, color="grupo") + stat_ellipse() +
scale_color_manual(values= c("deeppink1", "darkturquoise", "gold")) +
labs(title="PCoA") + theme_bw()
ggsave("figuras/PCoA_bray_points_phyloseq.png", device = "png", dpi = 300, units = "px")
plot_ordination(physeq, ordinate(physeq, "NMDS", "bray"), color="grupo") +
scale_color_manual(values= c("deeppink1", "darkturquoise", "gold")) +
labs(title="NMDS") + theme_bw()
ggsave("figuras/NMDS_bray_points_phyloseq.png", device = "png", dpi = 300, units = "px")
Diapositivas: 38-39
datos <- loadMeta("data/Genus.txt")
MRexp <- newMRexperiment(datos$counts)
p <- cumNormStatFast(MRexp)
normalized_matrix <- cumNormMat(MRexp, p = p)
head(normalized_matrix)
exportMat(normalized_matrix, file ="data/normal_Genus.txt")
transp_mat <- t(normalized_matrix)
muestras <- data.frame(estacion=rownames(transp_mat))
muestras$clase <- sapply(as.character(muestras$estacion),
function(x){strsplit(x, "_")[[1]][1]})
grupos <- muestras$clase
str(transp_mat)
resultado_anosim <- anosim(transp_mat,
grouping= grupos,
permutations = 999,
distance = "bray")
resultado_anosim
pairwise.adonis(x= transp_mat, grupos,
sim.function = 'vegdist',
sim.method='bray',
p.adjust.m='bonferroni')
ls()
sessionInfo()
Gamma de Colores en R
Colores HTML
Paleta de colores Opción 1
Paleta de colores Opción 2
Paleta de colores Opción 3
metagenomeSeq
phyloseq
qiime2R
UUASMB
Bitácoras
Metagenómica