--- tags: GeneLab --- # RR6, GLDS-249, V4 amplicon data [toc] # Processing Data available at [GLDS-249](https://genelab-data.ndc.nasa.gov/genelab/accession/GLDS-249/) (these samples are labeled FluidAA). Processed with [this Snakefile](https://github.com/AstrobioMike/mock-temp/blob/master/Amplicon/Illumina/Snakefile) that calls [this RScript](https://github.com/AstrobioMike/mock-temp/blob/master/Amplicon/Illumina/full-R-processing.R). ## Setup R env ```r= library("phyloseq") library("vegan") library("DESeq2") library("ggplot2") library("dendextend") library("tidyr") library("viridis") library("reshape") library("tidyverse") ## largely following along with my page here: https://astrobiomike.github.io/amplicon/dada2_workflow_ex ## see there for more discussion on anything ## reading in data files count_tab <- read.table("../Final_Outputs/counts.tsv", header=TRUE, row.names=1, check.names=FALSE, sep="\t") tax_tab <- read.table("../Final_Outputs/taxonomy.tsv", header=TRUE, row.names=1, check.names=FALSE, sep="\t", na.strings="NA") sample_info_tab <- read.table("sample-info.tsv", header=TRUE, row.names=1, check.names=FALSE, sep="\t", comment.char="") ## shortening sample names to remove any redundant info names(count_tab) <- names(count_tab) %>% str_replace("_FluidAA_Rep.?", "") %>% str_replace("[A-Z]*_", "") row.names(sample_info_tab) <- row.names(sample_info_tab) %>% str_replace("_FluidAA_Rep.?", "") %>% str_replace("[A-Z]*_", "") ``` ## Beta-diversity overviews ### All together #### Hclust ```r= # vst transforming deseq_counts <- DESeqDataSetFromMatrix(count_tab, colData = sample_info_tab, design = ~treatment + experiment) deseq_counts_vst <- varianceStabilizingTransformation(deseq_counts) vst_trans_count_tab <- assay(deseq_counts_vst) euc_dist <- dist(t(vst_trans_count_tab)) ## hclust euc_clust <- hclust(euc_dist, method="ward.D2") euc_dend <- as.dendrogram(euc_clust, hang=0.1) # color by treatment dend_cols <- as.character(sample_info_tab$treatment_color[order.dendrogram(euc_dend)]) labels_colors(euc_dend) <- dend_cols basal_col <- "#0D0887FF" flight_col <- "#9C179EFF" ground_col <- "#ED7953FF" plot(euc_dend, ylab="VST Euc. dist.", main="V4 amplicons (N=48; colored by Treatment)") legend("topright", legend=c("Basal", "Flight", "Ground"), fill=c(basal_col, flight_col, ground_col), title="Treatment", inset=0.05) # color by experiment dend_cols <- as.character(sample_info_tab$experiment_color[order.dendrogram(euc_dend)]) labels_colors(euc_dend) <- dend_cols ISS_T_col <- "#ED7953FF" LAR_col <- "#9C179EFF" plot(euc_dend, ylab="VST Euc. dist.", main="V4 amplicons (N=48); colored by Experiment") legend("topright", legend=c("ISS-T", "LAR"), fill=c(ISS_T_col, LAR_col), title="Experiment", inset=0.05) ``` <a href="https://i.imgur.com/GHLuEA1.png"><img src="https://i.imgur.com/GHLuEA1.png"></a> <a href="https://i.imgur.com/HHbJBXz.png"><img src="https://i.imgur.com/HHbJBXz.png"></a> #### Ordination ```r= ## making physeq object of vs-transformed tab vst_count_phy <- otu_table(vst_trans_count_tab, taxa_are_rows=T) sample_info_tab_phy <- sample_data(sample_info_tab) vst_physeq <- phyloseq(vst_count_phy, sample_info_tab_phy) # generating and visualizing the PCoA with phyloseq vst_pcoa <- ordinate(vst_physeq, method="MDS", distance="euclidean") eigen_vals <- vst_pcoa$values$Eigenvalues # allows us to scale the axes according to their magnitude of separating apart the samples # colored by treatment, shape by experiment plot_ordination(vst_physeq, vst_pcoa, shape="experiment") + geom_point(size=3, aes(color = treatment)) + scale_color_manual(values=unique(as.vector(sample_info_tab$treatment_color))) + coord_fixed(sqrt(eigen_vals[2]/eigen_vals[1])) + ggtitle("PCoA (VST Euc. dist.; N=48)") + theme_bw() + theme(legend.position="bottom") + labs(color = "Treatment", shape = "Experiment") ``` <a href="https://i.imgur.com/S7REZqy.png"><img src="https://i.imgur.com/S7REZqy.png"></a> ### ISS-T only ```R= ISS_T_samples <- sample_info_tab %>% filter(experiment == "ISS_T") %>% row.names() ISS_T_count_tab <- count_tab %>% select(all_of(ISS_T_samples)) ISS_T_sample_info_tab <- sample_info_tab[row.names(sample_info_tab) %in% ISS_T_samples, ] # vst transforming ISS_T_deseq_counts <- DESeqDataSetFromMatrix(ISS_T_count_tab, colData = ISS_T_sample_info_tab, design = ~treatment) ISS_T_deseq_counts_vst <- varianceStabilizingTransformation(ISS_T_deseq_counts) ISS_T_vst_trans_count_tab <- assay(ISS_T_deseq_counts_vst) ISS_T_euc_dist <- dist(t(ISS_T_vst_trans_count_tab)) # hclust ISS_T_euc_clust <- hclust(ISS_T_euc_dist, method="ward.D2") ISS_T_euc_dend <- as.dendrogram(ISS_T_euc_clust, hang=0.1) # color by treatment ISS_T_dend_cols <- as.character(ISS_T_sample_info_tab$treatment_color[order.dendrogram(ISS_T_euc_dend)]) labels_colors(ISS_T_euc_dend) <- ISS_T_dend_cols plot(ISS_T_euc_dend, ylab="VST Euc. dist.", main="V4 amplicons (ISS-T only; N=25)") legend("topright", legend=c("Basal", "Flight", "Ground"), fill=c(basal_col, flight_col, ground_col), title="Treatment", inset=0.05) # ordination ## making physeq object of vs-transformed tab ISS_T_vst_count_phy <- otu_table(ISS_T_vst_trans_count_tab, taxa_are_rows=T) ISS_T_sample_info_tab_phy <- sample_data(ISS_T_sample_info_tab) ISS_T_vst_physeq <- phyloseq(ISS_T_vst_count_phy, ISS_T_sample_info_tab_phy) # generating and visualizing the PCoA with phyloseq ISS_T_vst_pcoa <- ordinate(ISS_T_vst_physeq, method="MDS", distance="euclidean") ISS_T_eigen_vals <- ISS_T_vst_pcoa$values$Eigenvalues # allows us to scale the axes according to their magnitude of separating apart the samples # colored by treatment, shape by experiment plot_ordination(ISS_T_vst_physeq, ISS_T_vst_pcoa) + geom_point(size=3, aes(color = treatment)) + scale_color_manual(values=unique(as.vector(ISS_T_sample_info_tab$treatment_color))) + coord_fixed(sqrt(eigen_vals[2]/eigen_vals[1])) + ggtitle("PCoA (VST Euc. dist.; ISS-T only; N=25)") + theme_bw() + theme(legend.position="bottom") + labs(color = "Treatment") ``` <a href="https://i.imgur.com/4KhoLqk.png"><img src="https://i.imgur.com/4KhoLqk.png"></a> <a href="https://i.imgur.com/yQ9xNxq.png"><img src="https://i.imgur.com/yQ9xNxq.png"></a> ### LAR only ```r= LAR_samples <- sample_info_tab %>% filter(experiment == "LAR") %>% row.names() LAR_count_tab <- count_tab %>% select(all_of(LAR_samples)) LAR_sample_info_tab <- sample_info_tab[row.names(sample_info_tab) %in% LAR_samples, ] # vst transforming LAR_deseq_counts <- DESeqDataSetFromMatrix(LAR_count_tab, colData = LAR_sample_info_tab, design = ~treatment) LAR_deseq_counts_vst <- varianceStabilizingTransformation(LAR_deseq_counts) LAR_vst_trans_count_tab <- assay(LAR_deseq_counts_vst) LAR_euc_dist <- dist(t(LAR_vst_trans_count_tab)) # hclust LAR_euc_clust <- hclust(LAR_euc_dist, method="ward.D2") LAR_euc_dend <- as.dendrogram(LAR_euc_clust, hang=0.1) # color by treatment LAR_dend_cols <- as.character(LAR_sample_info_tab$treatment_color[order.dendrogram(LAR_euc_dend)]) labels_colors(LAR_euc_dend) <- LAR_dend_cols plot(LAR_euc_dend, ylab="VST Euc. dist.", main="V4 amplicons (LAR only; N=23)") legend("topright", legend=c("Basal", "Flight", "Ground"), fill=c(basal_col, flight_col, ground_col), title="Treatment", inset=0.05) # ordination ## making physeq object of vs-transformed tab LAR_vst_count_phy <- otu_table(LAR_vst_trans_count_tab, taxa_are_rows=T) LAR_sample_info_tab_phy <- sample_data(LAR_sample_info_tab) LAR_vst_physeq <- phyloseq(LAR_vst_count_phy, LAR_sample_info_tab_phy) # generating and visualizing the PCoA with phyloseq LAR_vst_pcoa <- ordinate(LAR_vst_physeq, method="MDS", distance="euclidean") LAR_eigen_vals <- LAR_vst_pcoa$values$Eigenvalues # allows us to scale the axes according to their magnitude of separating apart the samples # colored by treatment, shape by experiment plot_ordination(LAR_vst_physeq, LAR_vst_pcoa) + geom_point(size=3, aes(color = treatment), shape=17) + scale_color_manual(values=unique(as.vector(LAR_sample_info_tab$treatment_color))) + coord_fixed(sqrt(eigen_vals[2]/eigen_vals[1])) + ggtitle("PCoA (VST Euc. dist.; LAR only; N=23)") + theme_bw() + theme(legend.position="bottom") + labs(color = "Treatment") ``` <a href="https://i.imgur.com/R4IfEn9.png"><img src="https://i.imgur.com/R4IfEn9.png"></a> <a href="https://i.imgur.com/nItYZ6k.png"><img src="https://i.imgur.com/nItYZ6k.png"></a> ## Alpha-diversity overviews ### Rarefaction view ```r= rarecurve(t(count_tab), step=100, col=as.vector(sample_info_tab$treatment_color), lwd=2, ylab="ASVs", label=F, main="Rarefaction curve colored by treatment") legend("bottomright", legend=c("Basal", "Flight", "Ground"), fill=c(basal_col, flight_col, ground_col), title="Treatment", inset=0.05) rarecurve(t(count_tab), step=100, col=as.vector(sample_info_tab$experiment_color), lwd=2, ylab="ASVs", label=F, main="Rarefaction curve colored by experiment") legend("bottomright", legend=c("ISS-T", "LAR"), fill=c(ISS_T_col, LAR_col), title="Experiment", inset=0.05) ``` <a href="https://i.imgur.com/hw3OQbY.png"><img src="https://i.imgur.com/hw3OQbY.png"></a> <a href="https://i.imgur.com/5queIuQ.png"><img src="https://i.imgur.com/5queIuQ.png"></a> ### Diversity estimates ```r= ### richness/diversity estimates count_tab_phy <- otu_table(count_tab, taxa_are_rows=T) tax_tab_phy <- tax_table(as.matrix(tax_tab)) ASV_physeq <- phyloseq(count_tab_phy, tax_tab_phy, sample_info_tab_phy) # and now we can call the plot_richness() function on our phyloseq object plot_richness(ASV_physeq, color="treatment", shape="experiment", measures=c("Chao1", "Shannon")) + geom_point(size=3) + scale_color_manual(values=unique(as.vector(sample_info_tab$treatment_color))) + theme_bw() + theme(legend.title = element_blank(), strip.text = element_text(face = "bold"), axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5)) plot_richness(ASV_physeq, x="treatment", shape="experiment", color="treatment", measures=c("Chao1", "Shannon")) + geom_point(size=3) + scale_color_manual(values=unique(as.vector(sample_info_tab$treatment_color))) + theme_bw() + theme(legend.title = element_blank(), strip.text = element_text(face = "bold"), axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5)) plot_richness(ASV_physeq, x="experiment", shape="experiment", color="treatment", measures=c("Chao1", "Shannon")) + geom_point(size=3) + scale_color_manual(values=unique(as.vector(sample_info_tab$treatment_color))) + theme_bw() + theme(legend.title = element_blank(), strip.text = element_text(face = "bold"), axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5)) ``` <a href="https://i.imgur.com/y95MIfq.png"><img src="https://i.imgur.com/y95MIfq.png"></a> <a href="https://i.imgur.com/omlpVo9.png"><img src="https://i.imgur.com/omlpVo9.png"></a> <a href="https://i.imgur.com/rdj5a36.png"><img src="https://i.imgur.com/rdj5a36.png"></a> ## Broad-level taxonomy (not useful at this level) ```r= # using phyloseq to make a count table that has summed all ASVs # that were in the same phylum phyla_counts_tab <- otu_table(tax_glom(ASV_physeq, taxrank="phylum")) # making a vector of phyla names to set as row names phyla_tax_vec <- as.vector(tax_table(tax_glom(ASV_physeq, taxrank="phylum"))[,2]) rownames(phyla_counts_tab) <- as.vector(phyla_tax_vec) # we also have to account for sequences that weren't assigned any # taxonomy even at the phylum level # these came into R as 'NAs' in the taxonomy table, but their counts are # still in the count table # so we can get that value for each sample by substracting the column sums # of this new table (that has everything that had a phylum assigned to it) # from the column sums of the starting count table (that has all # representative sequences) unclassified_tax_counts <- colSums(count_tab) - colSums(phyla_counts_tab) # and we'll add this row to our phylum count table: phyla_and_unidentified_counts_tab <- rbind(phyla_counts_tab, "Unclassified"=unclassified_tax_counts) # should be "TRUE" identical(colSums(phyla_and_unidentified_counts_tab), colSums(count_tab)) # now we'll generate a proportions table for summarizing: major_taxa_proportions_tab <- data.frame(apply(phyla_and_unidentified_counts_tab, 2, function(x) x/sum(x)*100), check.names=FALSE) # making a copy of our table that's safe for manipulating major_taxa_proportions_tab_for_plot <- major_taxa_proportions_tab # and add a column of the taxa names so that it is within the table, rather # than just as row names (this makes working with ggplot easier) major_taxa_proportions_tab_for_plot$Phylum <- row.names(major_taxa_proportions_tab_for_plot) # now we'll transform the table into "narrow", also called "long", format (also makes # plotting easier) major_taxa_proportions_tab_for_plot.g <- gather(major_taxa_proportions_tab_for_plot, Sample, Proportion, -Phylum) # here we're making a new table by pulling what we want from the sample # information table sample_info_for_merge<-data.frame("Sample"=row.names(sample_info_tab), "treatment"=sample_info_tab$treatment, "experiment"=sample_info_tab$experiment, "treatment_color"=sample_info_tab$treatment_color, "experiment_color"=sample_info_tab$experiment_color, stringsAsFactors=F) major_taxa_proportions_tab_for_plot.g2 <- merge(major_taxa_proportions_tab_for_plot.g, sample_info_for_merge) # and now we're ready to make some summary figures with our wonderfully # constructed table ## a good color scheme can be hard to find, i included the viridis package ## here because it's color-blind friendly and sometimes it's been really ## helpful for me, though this is not demonstrated in all of the following :/ # one common way to look at this is with stacked bar charts for each taxon per sample: ggplot(major_taxa_proportions_tab_for_plot.g2, aes(x=Sample, y=Proportion, fill=Phylum)) + geom_bar(width=0.6, stat="identity") + theme_bw() + scale_fill_viridis(discrete=TRUE) + theme(axis.text.x=element_text(angle=90, vjust=0.4, hjust=1), legend.title=element_blank()) + labs(x="Sample", y="% of 16S rRNA V4 copies recovered", title="All samples") ####### ### tax is useless at this broad of a level, could breakdown bacteroidota and firmicutes when i have time, want to get to some stats though # Bacteroidota and Firmicutes were by far the most represented, so seeing how many classes there are of each tax_tab %>% filter(phylum == "Bacteroidota") %>% pull(class) %>% as.vector() %>% unique() # Bacteroidia tax_tab %>% filter(phylum == "Firmicutes") %>% pull(class) %>% as.vector() %>% unique() # Clostridia, Bacilli # checking how many orders of each tax_tab %>% filter(phylum == "Bacteroidota") %>% pull(order) %>% as.vector() %>% unique() # Bacteroidia, Flavobacteriales tax_tab %>% filter(phylum == "Firmicutes") %>% pull(order) %>% as.vector() %>% unique() # Clostridia, Bacilli ####### ``` <a href="https://i.imgur.com/VWTxAdm.png"><img src="https://i.imgur.com/VWTxAdm.png"></a> ## Whole-community level testing for differences ### ISS-T ```r= anova(betadisper(ISS_T_euc_dist, ISS_T_sample_info_tab$treatment)) # 0.01 # difference between within-group dispersions, can't do permanova on this # removing basal group ISS_T_no_basal_samples <- sample_info_tab %>% filter(experiment == "ISS_T", treatment != "BSL") %>% row.names() ISS_T_no_basal_count_tab <- count_tab %>% select(all_of(ISS_T_no_basal_samples)) ISS_T_no_basal_sample_info_tab <- sample_info_tab[row.names(sample_info_tab) %in% ISS_T_no_basal_samples, ] # transforming and getting distance # vst transforming ISS_T_no_basal_deseq_counts <- DESeqDataSetFromMatrix(ISS_T_no_basal_count_tab, colData = ISS_T_no_basal_sample_info_tab, design = ~treatment) ISS_T_no_basal_deseq_counts_vst <- varianceStabilizingTransformation(ISS_T_no_basal_deseq_counts) ISS_T_no_basal_vst_trans_count_tab <- assay(ISS_T_no_basal_deseq_counts_vst) ISS_T_no_basal_euc_dist <- dist(t(ISS_T_no_basal_vst_trans_count_tab)) # betadisper anova(betadisper(ISS_T_no_basal_euc_dist, ISS_T_no_basal_sample_info_tab$treatment)) # good, 0.6 # permanova adonis(ISS_T_no_basal_euc_dist~ISS_T_no_basal_sample_info_tab$treatment) # 0.001, sig diff between flight and ground # making ordination of just ISS-T flight vs ground ISS_T_no_basal_vst_count_phy <- otu_table(ISS_T_no_basal_vst_trans_count_tab, taxa_are_rows=TRUE) ISS_T_no_basal_sample_info_tab_phy <- sample_data(ISS_T_no_basal_sample_info_tab) ISS_T_no_basal_vst_physeq <- phyloseq(ISS_T_no_basal_vst_count_phy, ISS_T_no_basal_sample_info_tab_phy) # generating and visualizing the PCoA with phyloseq ISS_T_no_basal_vst_pcoa <- ordinate(ISS_T_no_basal_vst_physeq, method="MDS", distance="euclidean") ISS_T_no_basal_vst_eigen_vals <- ISS_T_no_basal_vst_pcoa$values$Eigenvalues # allows us to scale the axes according to their magnitude of separating apart the samples # and making our new ordination of just basalts with our adonis statistic plot_ordination(ISS_T_no_basal_vst_physeq, ISS_T_no_basal_vst_pcoa) + geom_point(size=3, aes(color = treatment)) + scale_color_manual(values=unique(as.vector(ISS_T_no_basal_sample_info_tab$treatment_color))) + annotate("text", x=-15, y=13.5, label="ISS-T, FLT vs GC", fontface = 2) + annotate("text", x=-15, y=12, label="Permutational ANOVA = 0.001", fontface = 2) + coord_fixed(sqrt(ISS_T_no_basal_vst_eigen_vals[2]/ISS_T_no_basal_vst_eigen_vals[1])) + ggtitle("PCoA (VST Euc. dist.; N=16)") + theme_bw() + theme(legend.position="bottom") + labs(color = "Treatment") ``` <a href="https://i.imgur.com/xMOCqEN.png"><img src="https://i.imgur.com/xMOCqEN.png"></a> ### LAR ```r= anova(betadisper(LAR_euc_dist, LAR_sample_info_tab$treatment)) # 0.27 adonis(LAR_euc_dist~LAR_sample_info_tab$treatment) # 0.001, at least one group is diff from the others at 0.001 # but for now removing basal group to look the same as above LAR_no_basal_samples <- sample_info_tab %>% filter(experiment == "LAR", treatment != "BSL") %>% row.names() LAR_no_basal_count_tab <- count_tab %>% select(all_of(LAR_no_basal_samples)) LAR_no_basal_sample_info_tab <- sample_info_tab[row.names(sample_info_tab) %in% LAR_no_basal_samples, ] # transforming and getting distance # vst transforming LAR_no_basal_deseq_counts <- DESeqDataSetFromMatrix(LAR_no_basal_count_tab, colData = LAR_no_basal_sample_info_tab, design = ~treatment) LAR_no_basal_deseq_counts_vst <- varianceStabilizingTransformation(LAR_no_basal_deseq_counts) LAR_no_basal_vst_trans_count_tab <- assay(LAR_no_basal_deseq_counts_vst) LAR_no_basal_euc_dist <- dist(t(LAR_no_basal_vst_trans_count_tab)) # betadisper anova(betadisper(LAR_no_basal_euc_dist, LAR_no_basal_sample_info_tab$treatment)) # good, 0.6 # permanova adonis(LAR_no_basal_euc_dist~LAR_no_basal_sample_info_tab$treatment) # 0.001, sig diff between LAR flight and ground too # making ordination of just LAR flight vs ground LAR_no_basal_vst_count_phy <- otu_table(LAR_no_basal_vst_trans_count_tab, taxa_are_rows=TRUE) LAR_no_basal_sample_info_tab_phy <- sample_data(LAR_no_basal_sample_info_tab) LAR_no_basal_vst_physeq <- phyloseq(LAR_no_basal_vst_count_phy, LAR_no_basal_sample_info_tab_phy) # generating and visualizing the PCoA with phyloseq LAR_no_basal_vst_pcoa <- ordinate(LAR_no_basal_vst_physeq, method="MDS", distance="euclidean") LAR_no_basal_vst_eigen_vals <- LAR_no_basal_vst_pcoa$values$Eigenvalues # allows us to scale the axes according to their magnitude of separating apart the samples # and making our new ordination of just basalts with our adonis statistic plot_ordination(LAR_no_basal_vst_physeq, LAR_no_basal_vst_pcoa) + geom_point(size=3, aes(color = treatment)) + scale_color_manual(values=unique(as.vector(LAR_no_basal_sample_info_tab$treatment_color))) + annotate("text", x=20, y=26.5, label="LAR, FLT vs GC", fontface = 2) + annotate("text", x=20, y=23.5, label="Permutational ANOVA = 0.001", fontface = 2) + coord_fixed(sqrt(LAR_no_basal_vst_eigen_vals[2]/LAR_no_basal_vst_eigen_vals[1])) + ggtitle("PCoA (VST Euc. dist.; N=16)") + theme_bw() + theme(legend.position="bottom") + labs(color = "Treatment") ``` <a href="https://i.imgur.com/Zqkr6tD.png"><img src="https://i.imgur.com/Zqkr6tD.png"></a> <a href=""><img src=""></a> <a href=""><img src=""></a> <a href=""><img src=""></a> <a href=""><img src=""></a> <a href=""><img src=""></a> <a href=""><img src=""></a> <a href=""><img src=""></a> <a href=""><img src=""></a> <a href=""><img src=""></a> <a href=""><img src=""></a> <a href=""><img src=""></a> <a href=""><img src=""></a> <a href=""><img src=""></a> <a href=""><img src=""></a>