---
tags: GeneLab
---
# RR6, GLDS-249, V4 amplicon data
[toc]
# Processing
Data available at [GLDS-249](https://genelab-data.ndc.nasa.gov/genelab/accession/GLDS-249/) (these samples are labeled FluidAA).
Processed with [this Snakefile](https://github.com/AstrobioMike/mock-temp/blob/master/Amplicon/Illumina/Snakefile) that calls [this RScript](https://github.com/AstrobioMike/mock-temp/blob/master/Amplicon/Illumina/full-R-processing.R).
## Setup R env
```r=
library("phyloseq")
library("vegan")
library("DESeq2")
library("ggplot2")
library("dendextend")
library("tidyr")
library("viridis")
library("reshape")
library("tidyverse")
## largely following along with my page here: https://astrobiomike.github.io/amplicon/dada2_workflow_ex
## see there for more discussion on anything
## reading in data files
count_tab <- read.table("../Final_Outputs/counts.tsv", header=TRUE, row.names=1, check.names=FALSE, sep="\t")
tax_tab <- read.table("../Final_Outputs/taxonomy.tsv", header=TRUE, row.names=1, check.names=FALSE, sep="\t", na.strings="NA")
sample_info_tab <- read.table("sample-info.tsv", header=TRUE, row.names=1, check.names=FALSE, sep="\t", comment.char="")
## shortening sample names to remove any redundant info
names(count_tab) <- names(count_tab) %>% str_replace("_FluidAA_Rep.?", "") %>% str_replace("[A-Z]*_", "")
row.names(sample_info_tab) <- row.names(sample_info_tab) %>% str_replace("_FluidAA_Rep.?", "") %>% str_replace("[A-Z]*_", "")
```
## Beta-diversity overviews
### All together
#### Hclust
```r=
# vst transforming
deseq_counts <- DESeqDataSetFromMatrix(count_tab, colData = sample_info_tab, design = ~treatment + experiment)
deseq_counts_vst <- varianceStabilizingTransformation(deseq_counts)
vst_trans_count_tab <- assay(deseq_counts_vst)
euc_dist <- dist(t(vst_trans_count_tab))
## hclust
euc_clust <- hclust(euc_dist, method="ward.D2")
euc_dend <- as.dendrogram(euc_clust, hang=0.1)
# color by treatment
dend_cols <- as.character(sample_info_tab$treatment_color[order.dendrogram(euc_dend)])
labels_colors(euc_dend) <- dend_cols
basal_col <- "#0D0887FF"
flight_col <- "#9C179EFF"
ground_col <- "#ED7953FF"
plot(euc_dend, ylab="VST Euc. dist.", main="V4 amplicons (N=48; colored by Treatment)")
legend("topright", legend=c("Basal", "Flight", "Ground"), fill=c(basal_col, flight_col, ground_col), title="Treatment", inset=0.05)
# color by experiment
dend_cols <- as.character(sample_info_tab$experiment_color[order.dendrogram(euc_dend)])
labels_colors(euc_dend) <- dend_cols
ISS_T_col <- "#ED7953FF"
LAR_col <- "#9C179EFF"
plot(euc_dend, ylab="VST Euc. dist.", main="V4 amplicons (N=48); colored by Experiment")
legend("topright", legend=c("ISS-T", "LAR"), fill=c(ISS_T_col, LAR_col), title="Experiment", inset=0.05)
```
<a href="https://i.imgur.com/GHLuEA1.png"><img src="https://i.imgur.com/GHLuEA1.png"></a>
<a href="https://i.imgur.com/HHbJBXz.png"><img src="https://i.imgur.com/HHbJBXz.png"></a>
#### Ordination
```r=
## making physeq object of vs-transformed tab
vst_count_phy <- otu_table(vst_trans_count_tab, taxa_are_rows=T)
sample_info_tab_phy <- sample_data(sample_info_tab)
vst_physeq <- phyloseq(vst_count_phy, sample_info_tab_phy)
# generating and visualizing the PCoA with phyloseq
vst_pcoa <- ordinate(vst_physeq, method="MDS", distance="euclidean")
eigen_vals <- vst_pcoa$values$Eigenvalues # allows us to scale the axes according to their magnitude of separating apart the samples
# colored by treatment, shape by experiment
plot_ordination(vst_physeq, vst_pcoa, shape="experiment") + geom_point(size=3, aes(color = treatment)) +
scale_color_manual(values=unique(as.vector(sample_info_tab$treatment_color))) +
coord_fixed(sqrt(eigen_vals[2]/eigen_vals[1])) + ggtitle("PCoA (VST Euc. dist.; N=48)") +
theme_bw() + theme(legend.position="bottom") + labs(color = "Treatment", shape = "Experiment")
```
<a href="https://i.imgur.com/S7REZqy.png"><img src="https://i.imgur.com/S7REZqy.png"></a>
### ISS-T only
```R=
ISS_T_samples <- sample_info_tab %>% filter(experiment == "ISS_T") %>% row.names()
ISS_T_count_tab <- count_tab %>% select(all_of(ISS_T_samples))
ISS_T_sample_info_tab <- sample_info_tab[row.names(sample_info_tab) %in% ISS_T_samples, ]
# vst transforming
ISS_T_deseq_counts <- DESeqDataSetFromMatrix(ISS_T_count_tab, colData = ISS_T_sample_info_tab, design = ~treatment)
ISS_T_deseq_counts_vst <- varianceStabilizingTransformation(ISS_T_deseq_counts)
ISS_T_vst_trans_count_tab <- assay(ISS_T_deseq_counts_vst)
ISS_T_euc_dist <- dist(t(ISS_T_vst_trans_count_tab))
# hclust
ISS_T_euc_clust <- hclust(ISS_T_euc_dist, method="ward.D2")
ISS_T_euc_dend <- as.dendrogram(ISS_T_euc_clust, hang=0.1)
# color by treatment
ISS_T_dend_cols <- as.character(ISS_T_sample_info_tab$treatment_color[order.dendrogram(ISS_T_euc_dend)])
labels_colors(ISS_T_euc_dend) <- ISS_T_dend_cols
plot(ISS_T_euc_dend, ylab="VST Euc. dist.", main="V4 amplicons (ISS-T only; N=25)")
legend("topright", legend=c("Basal", "Flight", "Ground"), fill=c(basal_col, flight_col, ground_col), title="Treatment", inset=0.05)
# ordination
## making physeq object of vs-transformed tab
ISS_T_vst_count_phy <- otu_table(ISS_T_vst_trans_count_tab, taxa_are_rows=T)
ISS_T_sample_info_tab_phy <- sample_data(ISS_T_sample_info_tab)
ISS_T_vst_physeq <- phyloseq(ISS_T_vst_count_phy, ISS_T_sample_info_tab_phy)
# generating and visualizing the PCoA with phyloseq
ISS_T_vst_pcoa <- ordinate(ISS_T_vst_physeq, method="MDS", distance="euclidean")
ISS_T_eigen_vals <- ISS_T_vst_pcoa$values$Eigenvalues # allows us to scale the axes according to their magnitude of separating apart the samples
# colored by treatment, shape by experiment
plot_ordination(ISS_T_vst_physeq, ISS_T_vst_pcoa) + geom_point(size=3, aes(color = treatment)) +
scale_color_manual(values=unique(as.vector(ISS_T_sample_info_tab$treatment_color))) +
coord_fixed(sqrt(eigen_vals[2]/eigen_vals[1])) + ggtitle("PCoA (VST Euc. dist.; ISS-T only; N=25)") +
theme_bw() + theme(legend.position="bottom") + labs(color = "Treatment")
```
<a href="https://i.imgur.com/4KhoLqk.png"><img src="https://i.imgur.com/4KhoLqk.png"></a>
<a href="https://i.imgur.com/yQ9xNxq.png"><img src="https://i.imgur.com/yQ9xNxq.png"></a>
### LAR only
```r=
LAR_samples <- sample_info_tab %>% filter(experiment == "LAR") %>% row.names()
LAR_count_tab <- count_tab %>% select(all_of(LAR_samples))
LAR_sample_info_tab <- sample_info_tab[row.names(sample_info_tab) %in% LAR_samples, ]
# vst transforming
LAR_deseq_counts <- DESeqDataSetFromMatrix(LAR_count_tab, colData = LAR_sample_info_tab, design = ~treatment)
LAR_deseq_counts_vst <- varianceStabilizingTransformation(LAR_deseq_counts)
LAR_vst_trans_count_tab <- assay(LAR_deseq_counts_vst)
LAR_euc_dist <- dist(t(LAR_vst_trans_count_tab))
# hclust
LAR_euc_clust <- hclust(LAR_euc_dist, method="ward.D2")
LAR_euc_dend <- as.dendrogram(LAR_euc_clust, hang=0.1)
# color by treatment
LAR_dend_cols <- as.character(LAR_sample_info_tab$treatment_color[order.dendrogram(LAR_euc_dend)])
labels_colors(LAR_euc_dend) <- LAR_dend_cols
plot(LAR_euc_dend, ylab="VST Euc. dist.", main="V4 amplicons (LAR only; N=23)")
legend("topright", legend=c("Basal", "Flight", "Ground"), fill=c(basal_col, flight_col, ground_col), title="Treatment", inset=0.05)
# ordination
## making physeq object of vs-transformed tab
LAR_vst_count_phy <- otu_table(LAR_vst_trans_count_tab, taxa_are_rows=T)
LAR_sample_info_tab_phy <- sample_data(LAR_sample_info_tab)
LAR_vst_physeq <- phyloseq(LAR_vst_count_phy, LAR_sample_info_tab_phy)
# generating and visualizing the PCoA with phyloseq
LAR_vst_pcoa <- ordinate(LAR_vst_physeq, method="MDS", distance="euclidean")
LAR_eigen_vals <- LAR_vst_pcoa$values$Eigenvalues # allows us to scale the axes according to their magnitude of separating apart the samples
# colored by treatment, shape by experiment
plot_ordination(LAR_vst_physeq, LAR_vst_pcoa) + geom_point(size=3, aes(color = treatment), shape=17) +
scale_color_manual(values=unique(as.vector(LAR_sample_info_tab$treatment_color))) +
coord_fixed(sqrt(eigen_vals[2]/eigen_vals[1])) + ggtitle("PCoA (VST Euc. dist.; LAR only; N=23)") +
theme_bw() + theme(legend.position="bottom") + labs(color = "Treatment")
```
<a href="https://i.imgur.com/R4IfEn9.png"><img src="https://i.imgur.com/R4IfEn9.png"></a>
<a href="https://i.imgur.com/nItYZ6k.png"><img src="https://i.imgur.com/nItYZ6k.png"></a>
## Alpha-diversity overviews
### Rarefaction view
```r=
rarecurve(t(count_tab), step=100, col=as.vector(sample_info_tab$treatment_color), lwd=2, ylab="ASVs", label=F, main="Rarefaction curve colored by treatment")
legend("bottomright", legend=c("Basal", "Flight", "Ground"), fill=c(basal_col, flight_col, ground_col), title="Treatment", inset=0.05)
rarecurve(t(count_tab), step=100, col=as.vector(sample_info_tab$experiment_color), lwd=2, ylab="ASVs", label=F, main="Rarefaction curve colored by experiment")
legend("bottomright", legend=c("ISS-T", "LAR"), fill=c(ISS_T_col, LAR_col), title="Experiment", inset=0.05)
```
<a href="https://i.imgur.com/hw3OQbY.png"><img src="https://i.imgur.com/hw3OQbY.png"></a>
<a href="https://i.imgur.com/5queIuQ.png"><img src="https://i.imgur.com/5queIuQ.png"></a>
### Diversity estimates
```r=
### richness/diversity estimates
count_tab_phy <- otu_table(count_tab, taxa_are_rows=T)
tax_tab_phy <- tax_table(as.matrix(tax_tab))
ASV_physeq <- phyloseq(count_tab_phy, tax_tab_phy, sample_info_tab_phy)
# and now we can call the plot_richness() function on our phyloseq object
plot_richness(ASV_physeq, color="treatment", shape="experiment", measures=c("Chao1", "Shannon")) + geom_point(size=3) +
scale_color_manual(values=unique(as.vector(sample_info_tab$treatment_color))) + theme_bw() +
theme(legend.title = element_blank(), strip.text = element_text(face = "bold"), axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5))
plot_richness(ASV_physeq, x="treatment", shape="experiment", color="treatment", measures=c("Chao1", "Shannon")) + geom_point(size=3) +
scale_color_manual(values=unique(as.vector(sample_info_tab$treatment_color))) + theme_bw() +
theme(legend.title = element_blank(), strip.text = element_text(face = "bold"), axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5))
plot_richness(ASV_physeq, x="experiment", shape="experiment", color="treatment", measures=c("Chao1", "Shannon")) + geom_point(size=3) +
scale_color_manual(values=unique(as.vector(sample_info_tab$treatment_color))) + theme_bw() +
theme(legend.title = element_blank(), strip.text = element_text(face = "bold"), axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5))
```
<a href="https://i.imgur.com/y95MIfq.png"><img src="https://i.imgur.com/y95MIfq.png"></a>
<a href="https://i.imgur.com/omlpVo9.png"><img src="https://i.imgur.com/omlpVo9.png"></a>
<a href="https://i.imgur.com/rdj5a36.png"><img src="https://i.imgur.com/rdj5a36.png"></a>
## Broad-level taxonomy (not useful at this level)
```r=
# using phyloseq to make a count table that has summed all ASVs
# that were in the same phylum
phyla_counts_tab <- otu_table(tax_glom(ASV_physeq, taxrank="phylum"))
# making a vector of phyla names to set as row names
phyla_tax_vec <- as.vector(tax_table(tax_glom(ASV_physeq, taxrank="phylum"))[,2])
rownames(phyla_counts_tab) <- as.vector(phyla_tax_vec)
# we also have to account for sequences that weren't assigned any
# taxonomy even at the phylum level
# these came into R as 'NAs' in the taxonomy table, but their counts are
# still in the count table
# so we can get that value for each sample by substracting the column sums
# of this new table (that has everything that had a phylum assigned to it)
# from the column sums of the starting count table (that has all
# representative sequences)
unclassified_tax_counts <- colSums(count_tab) - colSums(phyla_counts_tab)
# and we'll add this row to our phylum count table:
phyla_and_unidentified_counts_tab <- rbind(phyla_counts_tab, "Unclassified"=unclassified_tax_counts)
# should be "TRUE"
identical(colSums(phyla_and_unidentified_counts_tab), colSums(count_tab))
# now we'll generate a proportions table for summarizing:
major_taxa_proportions_tab <- data.frame(apply(phyla_and_unidentified_counts_tab, 2, function(x) x/sum(x)*100), check.names=FALSE)
# making a copy of our table that's safe for manipulating
major_taxa_proportions_tab_for_plot <- major_taxa_proportions_tab
# and add a column of the taxa names so that it is within the table, rather
# than just as row names (this makes working with ggplot easier)
major_taxa_proportions_tab_for_plot$Phylum <- row.names(major_taxa_proportions_tab_for_plot)
# now we'll transform the table into "narrow", also called "long", format (also makes
# plotting easier)
major_taxa_proportions_tab_for_plot.g <- gather(major_taxa_proportions_tab_for_plot, Sample, Proportion, -Phylum)
# here we're making a new table by pulling what we want from the sample
# information table
sample_info_for_merge<-data.frame("Sample"=row.names(sample_info_tab), "treatment"=sample_info_tab$treatment, "experiment"=sample_info_tab$experiment, "treatment_color"=sample_info_tab$treatment_color, "experiment_color"=sample_info_tab$experiment_color, stringsAsFactors=F)
major_taxa_proportions_tab_for_plot.g2 <- merge(major_taxa_proportions_tab_for_plot.g, sample_info_for_merge)
# and now we're ready to make some summary figures with our wonderfully
# constructed table
## a good color scheme can be hard to find, i included the viridis package
## here because it's color-blind friendly and sometimes it's been really
## helpful for me, though this is not demonstrated in all of the following :/
# one common way to look at this is with stacked bar charts for each taxon per sample:
ggplot(major_taxa_proportions_tab_for_plot.g2, aes(x=Sample, y=Proportion, fill=Phylum)) +
geom_bar(width=0.6, stat="identity") + theme_bw() + scale_fill_viridis(discrete=TRUE) +
theme(axis.text.x=element_text(angle=90, vjust=0.4, hjust=1), legend.title=element_blank()) +
labs(x="Sample", y="% of 16S rRNA V4 copies recovered", title="All samples")
#######
### tax is useless at this broad of a level, could breakdown bacteroidota and firmicutes when i have time, want to get to some stats though
# Bacteroidota and Firmicutes were by far the most represented, so seeing how many classes there are of each
tax_tab %>% filter(phylum == "Bacteroidota") %>% pull(class) %>% as.vector() %>% unique()
# Bacteroidia
tax_tab %>% filter(phylum == "Firmicutes") %>% pull(class) %>% as.vector() %>% unique()
# Clostridia, Bacilli
# checking how many orders of each
tax_tab %>% filter(phylum == "Bacteroidota") %>% pull(order) %>% as.vector() %>% unique()
# Bacteroidia, Flavobacteriales
tax_tab %>% filter(phylum == "Firmicutes") %>% pull(order) %>% as.vector() %>% unique()
# Clostridia, Bacilli
#######
```
<a href="https://i.imgur.com/VWTxAdm.png"><img src="https://i.imgur.com/VWTxAdm.png"></a>
## Whole-community level testing for differences
### ISS-T
```r=
anova(betadisper(ISS_T_euc_dist, ISS_T_sample_info_tab$treatment)) # 0.01
# difference between within-group dispersions, can't do permanova on this
# removing basal group
ISS_T_no_basal_samples <- sample_info_tab %>% filter(experiment == "ISS_T", treatment != "BSL") %>% row.names()
ISS_T_no_basal_count_tab <- count_tab %>% select(all_of(ISS_T_no_basal_samples))
ISS_T_no_basal_sample_info_tab <- sample_info_tab[row.names(sample_info_tab) %in% ISS_T_no_basal_samples, ]
# transforming and getting distance
# vst transforming
ISS_T_no_basal_deseq_counts <- DESeqDataSetFromMatrix(ISS_T_no_basal_count_tab, colData = ISS_T_no_basal_sample_info_tab, design = ~treatment)
ISS_T_no_basal_deseq_counts_vst <- varianceStabilizingTransformation(ISS_T_no_basal_deseq_counts)
ISS_T_no_basal_vst_trans_count_tab <- assay(ISS_T_no_basal_deseq_counts_vst)
ISS_T_no_basal_euc_dist <- dist(t(ISS_T_no_basal_vst_trans_count_tab))
# betadisper
anova(betadisper(ISS_T_no_basal_euc_dist, ISS_T_no_basal_sample_info_tab$treatment)) # good, 0.6
# permanova
adonis(ISS_T_no_basal_euc_dist~ISS_T_no_basal_sample_info_tab$treatment) # 0.001, sig diff between flight and ground
# making ordination of just ISS-T flight vs ground
ISS_T_no_basal_vst_count_phy <- otu_table(ISS_T_no_basal_vst_trans_count_tab, taxa_are_rows=TRUE)
ISS_T_no_basal_sample_info_tab_phy <- sample_data(ISS_T_no_basal_sample_info_tab)
ISS_T_no_basal_vst_physeq <- phyloseq(ISS_T_no_basal_vst_count_phy, ISS_T_no_basal_sample_info_tab_phy)
# generating and visualizing the PCoA with phyloseq
ISS_T_no_basal_vst_pcoa <- ordinate(ISS_T_no_basal_vst_physeq, method="MDS", distance="euclidean")
ISS_T_no_basal_vst_eigen_vals <- ISS_T_no_basal_vst_pcoa$values$Eigenvalues # allows us to scale the axes according to their magnitude of separating apart the samples
# and making our new ordination of just basalts with our adonis statistic
plot_ordination(ISS_T_no_basal_vst_physeq, ISS_T_no_basal_vst_pcoa) + geom_point(size=3, aes(color = treatment)) +
scale_color_manual(values=unique(as.vector(ISS_T_no_basal_sample_info_tab$treatment_color))) +
annotate("text", x=-15, y=13.5, label="ISS-T, FLT vs GC", fontface = 2) +
annotate("text", x=-15, y=12, label="Permutational ANOVA = 0.001", fontface = 2) +
coord_fixed(sqrt(ISS_T_no_basal_vst_eigen_vals[2]/ISS_T_no_basal_vst_eigen_vals[1])) + ggtitle("PCoA (VST Euc. dist.; N=16)") +
theme_bw() + theme(legend.position="bottom") + labs(color = "Treatment")
```
<a href="https://i.imgur.com/xMOCqEN.png"><img src="https://i.imgur.com/xMOCqEN.png"></a>
### LAR
```r=
anova(betadisper(LAR_euc_dist, LAR_sample_info_tab$treatment)) # 0.27
adonis(LAR_euc_dist~LAR_sample_info_tab$treatment) # 0.001, at least one group is diff from the others at 0.001
# but for now removing basal group to look the same as above
LAR_no_basal_samples <- sample_info_tab %>% filter(experiment == "LAR", treatment != "BSL") %>% row.names()
LAR_no_basal_count_tab <- count_tab %>% select(all_of(LAR_no_basal_samples))
LAR_no_basal_sample_info_tab <- sample_info_tab[row.names(sample_info_tab) %in% LAR_no_basal_samples, ]
# transforming and getting distance
# vst transforming
LAR_no_basal_deseq_counts <- DESeqDataSetFromMatrix(LAR_no_basal_count_tab, colData = LAR_no_basal_sample_info_tab, design = ~treatment)
LAR_no_basal_deseq_counts_vst <- varianceStabilizingTransformation(LAR_no_basal_deseq_counts)
LAR_no_basal_vst_trans_count_tab <- assay(LAR_no_basal_deseq_counts_vst)
LAR_no_basal_euc_dist <- dist(t(LAR_no_basal_vst_trans_count_tab))
# betadisper
anova(betadisper(LAR_no_basal_euc_dist, LAR_no_basal_sample_info_tab$treatment)) # good, 0.6
# permanova
adonis(LAR_no_basal_euc_dist~LAR_no_basal_sample_info_tab$treatment) # 0.001, sig diff between LAR flight and ground too
# making ordination of just LAR flight vs ground
LAR_no_basal_vst_count_phy <- otu_table(LAR_no_basal_vst_trans_count_tab, taxa_are_rows=TRUE)
LAR_no_basal_sample_info_tab_phy <- sample_data(LAR_no_basal_sample_info_tab)
LAR_no_basal_vst_physeq <- phyloseq(LAR_no_basal_vst_count_phy, LAR_no_basal_sample_info_tab_phy)
# generating and visualizing the PCoA with phyloseq
LAR_no_basal_vst_pcoa <- ordinate(LAR_no_basal_vst_physeq, method="MDS", distance="euclidean")
LAR_no_basal_vst_eigen_vals <- LAR_no_basal_vst_pcoa$values$Eigenvalues # allows us to scale the axes according to their magnitude of separating apart the samples
# and making our new ordination of just basalts with our adonis statistic
plot_ordination(LAR_no_basal_vst_physeq, LAR_no_basal_vst_pcoa) + geom_point(size=3, aes(color = treatment)) +
scale_color_manual(values=unique(as.vector(LAR_no_basal_sample_info_tab$treatment_color))) +
annotate("text", x=20, y=26.5, label="LAR, FLT vs GC", fontface = 2) +
annotate("text", x=20, y=23.5, label="Permutational ANOVA = 0.001", fontface = 2) +
coord_fixed(sqrt(LAR_no_basal_vst_eigen_vals[2]/LAR_no_basal_vst_eigen_vals[1])) + ggtitle("PCoA (VST Euc. dist.; N=16)") +
theme_bw() + theme(legend.position="bottom") + labs(color = "Treatment")
```
<a href="https://i.imgur.com/Zqkr6tD.png"><img src="https://i.imgur.com/Zqkr6tD.png"></a>
<a href=""><img src=""></a>
<a href=""><img src=""></a>
<a href=""><img src=""></a>
<a href=""><img src=""></a>
<a href=""><img src=""></a>
<a href=""><img src=""></a>
<a href=""><img src=""></a>
<a href=""><img src=""></a>
<a href=""><img src=""></a>
<a href=""><img src=""></a>
<a href=""><img src=""></a>
<a href=""><img src=""></a>
<a href=""><img src=""></a>
<a href=""><img src=""></a>