---
tags: Aquaponics
title: Diversity biz (26-May-2021)
---
# Diversity biz
# Summary
---
> **Nothing significantly different based on estimated shannon diversity at the family-level taxonomic classifications we have (that's the lowest taxonomic resolution we have).**
---
[toc]
# Setting up
```R
library(phyloseq)
library(DivNet)
library(tidyverse)
library(magrittr)
theme_set(theme_bw)
# helper functions for mass running things
make_div_obj <- function(phy_obj, wanted_formula, cores = 1) {
# running divnet
div_obj <- phy_obj %>% divnet(formula = wanted_formula, ncores = cores)
return(div_obj)
}
run_div_test <- function(phy_obj, div_obj, target_ref = NULL) {
# getting covariate
target_covariate <- attributes(div_obj$X)$contrasts %>% names
# getting single estimate and se for each treatment to give to betta function
# getting sample info tab alone
sample_tab <- phy_obj %>% sample_data %>% data.frame
# getting unique treatment types
unique_treatment_types <- sample_tab %>% pull(target_covariate) %>% unique %>% as.character
unique_samples_needed <- c()
for ( type in unique_treatment_types ) {
sample_to_add <- sample_tab %>% filter(!!as.symbol(target_covariate) == !!type) %>% head(1) %>% row.names()
unique_samples_needed <- c(unique_samples_needed, sample_to_add)
}
# subsetting divnet object to one per unique type
sub_div_obj_shannon <- div_obj$shannon[unique_samples_needed]
sub_div_obj_shannon_variance <- div_obj$`shannon-variance`[unique_samples_needed]
# getting estimates and standard errors for one entry for each type
estimates <- c()
ses <- c()
for ( i in 1:length(unique_samples_needed) ) {
estimates <- c(estimates, unlist(sub_div_obj_shannon[i])[1] %>% as.numeric())
ses <- c(ses, sqrt(sub_div_obj_shannon_variance[i]))
}
# making subset sample tab
sub_sample_tab <- sample_tab[unique_samples_needed, ]
# setting up matrix
predictors_ <- sub_sample_tab[, names(sub_sample_tab) %in% target_covariate]
# there is no built-in way to change the baseline in the contrasts, so modifying the factor levels as suggested on divnet issue here: https://github.com/adw96/DivNet/issues/37
if ( is.null(target_ref) ) {
X <- model.matrix(~predictors_)
baseline <- predictors_ %>% levels %>% head(1)
} else {
predictors_ <- predictors_ %>% relevel(ref = target_ref)
baseline <- predictors_ %>% levels %>% head(1)
X <- model.matrix(~predictors_)
}
res <- betta(estimates, ses, X)
cat("With '", baseline, "' as the baseline:\n\n", sep = "")
tab <- res$table %>% data.frame
# p-values are calculated as: 2*(1-pnorm(abs(Estimate/SE))) (see betta function)
# I think getting 0s is just due to how many digits R is working with. It seems
# pnorm(5.32672) = 0.9999999 and pnorm(5.32673) = 1. So if our abs(Estimate/SE)
# is greater than 5.32672, we’re going to get a p-value of 0. So setting 0's to
# < 1e-8 for what is printed out
new_p_vec <- vector()
for ( value in tab$p.values ) {
if ( value == 0 ) {
new_p_vec <- c(new_p_vec, "< 1e-8")
} else {
new_p_vec <- c(new_p_vec, value)
}
}
tab$p.values <- new_p_vec
print(tab)
return(res)
}
plot_DivNet_diversity <- function(phy_obj, div_obj, rank = "Family") {
# getting covariate
target_covariate <- attributes(div_obj$X)$contrasts %>% names
# getting sample table from phyloseq object
sample_tab <- phy_obj %>% sample_data %>% data.frame
# getting unique treatment types
unique_treatment_types <- sample_tab %>% pull(target_covariate) %>% unique %>% as.character
unique_samples_needed <- c()
for ( type in unique_treatment_types ) {
sample_to_add <- sample_tab %>% filter(!!as.symbol(target_covariate) == type) %>% head(1) %>% row.names()
unique_samples_needed <- c(unique_samples_needed, sample_to_add)
}
sub_div_obj_shannon <- div_obj$shannon[c(unique_samples_needed)]
div_est_vec <- c()
ci_vec <- c()
for ( i in 1:length(unique_samples_needed) ) {
div_est_vec <- c(div_est_vec, unlist(sub_div_obj_shannon[i])[1] %>% as.numeric())
ci_vec <- c(ci_vec, unlist(sub_div_obj_shannon[i])[2] %>% as.numeric() * 2)
}
sub_plot_tab <- data.frame(target_covariate = unique_treatment_types, shannon = div_est_vec, ci = ci_vec)
# getting max value for plot y-axis
# max_y <- max(sub_plot_tab$shannon + sub_plot_tab$ci) %>% ceiling
y_axis_label = paste0("Shannon diversity estimate\n(", rank, " level)")
plot <- ggplot(sub_plot_tab, aes(x = target_covariate, y = shannon, color = target_covariate)) +
geom_errorbar(aes(ymin = shannon - ci, ymax = shannon + ci), width = 0) +
geom_point() + theme_bw() +
xlab(target_covariate) + ylab(y_axis_label) +
theme(legend.position = "none") # + coord_cartesian(ylim = c(0,max_y))
return(plot)
}
```
```
# reading in our data (excluding EM1 and Lambda)
otu_counts_tab <- read.table("combined_otu_counts_tab.txt", header=TRUE, row.names=1, sep="\t")[,c(1:30)]
taxonomy_tab <- read.table("combined_taxonomy_tab.txt", header=TRUE, row.names=1, sep="\t", na.strings="<NA>")
sample_info_tab <- read.table("sample_info_tab.txt", header=T, check.names=F, row.names=1, sep="\t")[c(1:30),]
# making phyloseq object
counts_phy <- otu_table(otu_counts_tab, taxa_are_rows=TRUE)
tax_phy <- tax_table(as.matrix(taxonomy_tab))
sample_phy <- sample_data(sample_info_tab)
aqua_phy <- phyloseq(counts_phy, tax_phy, sample_phy)
# grouping at family rank
aqua_family_phy <- tax_glom(aqua_phy, taxrank = "Family")
```
# Based on timepoint
```
time_point_div_obj <- make_div_obj(aqua_family_phy, ~Time_point, cores = 4)
time_point_T1_base_div_res <- run_div_test(aqua_family_phy, time_point_div_obj)
```
## Not Sig (p = 0.307)
```
With 'T1' as the baseline:
Estimates Standard.Errors p.values
(Intercept) 3.07915048 0.1057571 < 1e-8
predictors_T2 0.01151054 0.1735481 0.947
predictors_T3 -0.23486316 0.2413842 0.331
```
```
time_point_T2_base_div_res <- run_div_test(aqua_family_phy, time_point_div_obj, target_ref = "T2")
```
```
With 'T2' as the baseline:
Estimates Standard.Errors p.values
(Intercept) 3.09066102 0.1057571 < 1e-8
predictors_T1 -0.01151054 0.1600364 0.943
predictors_T3 -0.24637370 0.2413842 0.307
```
## Plotting
```
plot_DivNet_diversity(aqua_family_phy, time_point_div_obj)
```
<a href="https://i.imgur.com/1HVkXIv.png"><img src="https://i.imgur.com/1HVkXIv.png"></a>
---
# Based on lighting
```
lighting_div_obj <- make_div_obj(aqua_family_phy, ~Lighting, cores = 4)
lighting_div_res <- run_div_test(aqua_family_phy, lighting_div_obj)
```
## Not Sig (p = 0.208)
```
With 'Artificial' as the baseline:
Estimates Standard.Errors p.values
(Intercept) 2.9169832 0.1190661 < 1e-8
predictors_Natural 0.1772417 0.1407935 0.208
```
## Plotting
```
plot_DivNet_diversity(aqua_family_phy, lighting_div_obj)
```
<a href="https://i.imgur.com/C4LXsD9.png"><img src="https://i.imgur.com/C4LXsD9.png"></a>
---
# Based on plants
```
plants_div_obj <- make_div_obj(aqua_family_phy, ~Plants, cores = 4)
plants_div_res <- run_div_test(aqua_phy, plants_div_obj)
```
## Tomatos lower diversity than Peas (0.068)
```
With 'Basil' as the baseline:
Estimates Standard.Errors p.values
(Intercept) 3.0236533 0.1563061 < 1e-8
predictors_Peas 0.1597980 0.2672525 0.55
predictors_Tomatoes -0.3516465 0.2807357 0.21
```
```
plants_div_res_peas_baseline <- run_div_test(aqua_phy, plants_div_obj, target_ref = "Peas")
```
```
With 'Peas' as the baseline:
Estimates Standard.Errors p.values
(Intercept) 3.1834513 0.1563061 < 1e-8
predictors_Basil -0.1597980 0.2649870 0.546
predictors_Tomatoes -0.5114445 0.2807357 0.068
```
## Plotting
```
plot_DivNet_diversity(aqua_family_phy, plants_div_obj)
```
<a href="https://i.imgur.com/944FNda.png"><img src="https://i.imgur.com/944FNda.png"></a>
---
# Based on water
```
water_div_obj <- make_div_obj(aqua_family_phy, ~Water_circulation, cores = 4)
water_div_res <- run_div_test(aqua_family_phy, water_div_obj)
```
## Not Sig (0.26)
```With 'Pumps' as the baseline:
Estimates Standard.Errors p.values
(Intercept) 3.1727239 0.1948949 < 1e-8
predictors_Wicks -0.3268373 0.2880062 0.256
```
## Plotting
```
plot_DivNet_diversity(aqua_family_phy, water_div_obj)
```
<a href="https://i.imgur.com/VZezc88.png"><img src="https://i.imgur.com/VZezc88.png"></a>
---
# Based on nitrifying components
```
N_div_obj <- make_div_obj(aqua_family_phy, ~Nitrifying_components, cores = 4)
N_div_res <- run_div_test(aqua_family_phy, N_div_obj)
```
## Not Sig (0.172)
```
With 'Collected_gravel' as the baseline:
Estimates Standard.Errors p.values
(Intercept) 2.775672 0.2106208 < 1e-8
predictors_Commercial 0.347640 0.2547854 0.172
```
## Plotting
```
plot_DivNet_diversity(aqua_family_phy, N_div_obj)
```
<a href="https://i.imgur.com/QXejwuE.png"><img src="https://i.imgur.com/QXejwuE.png"></a>
---
# Splitting plants
```
aqua_peas_family_phy <- subset_samples(aqua_family_phy, Plants == "Peas")
aqua_basil_family_phy <- subset_samples(aqua_family_phy, Plants == "Basil")
aqua_tomatoes_family_phy <- subset_samples(aqua_family_phy, Plants == "Tomatoes")
```
## Peas
### Timepoint (not sig: p = 0.45)
```
peas_time_point_div_obj <- make_div_obj(aqua_peas_family_phy, ~Time_point, cores = 4)
peas_time_point_T1_base_div_res <- run_div_test(aqua_peas_family_phy, peas_time_point_div_obj)
```
```
With 'T1' as the baseline:
Estimates Standard.Errors p.values
(Intercept) 3.0053968 0.2185220 < 1e-8
predictors_T2 0.2398329 0.3193968 0.453
predictors_T3 -0.2985799 0.4024406 0.458
```
```
peas_time_point_T2_base_div_res <- run_div_test(aqua_peas_family_phy, peas_time_point_div_obj, target_ref = "T2")
```
```
With 'T2' as the baseline:
Estimates Standard.Errors p.values
(Intercept) 3.2452297 0.2185220 < 1e-8
predictors_T1 -0.2398329 0.4488045 0.593
predictors_T3 -0.5384127 0.4024406 0.181
```
### Plotting
```
plot_DivNet_diversity(aqua_peas_family_phy, peas_time_point_div_obj)
```
<a href="https://i.imgur.com/b5fyCwo.png"><img src="https://i.imgur.com/b5fyCwo.png"></a>
---
### Lighting (not sig: p = 0.555)
```
peas_lighting_div_obj <- make_div_obj(aqua_peas_family_phy, ~Lighting, cores = 4)
peas_lighting_div_res <- run_div_test(aqua_peas_family_phy, peas_lighting_div_obj)
```
```
With 'Artificial' as the baseline:
Estimates Standard.Errors p.values
(Intercept) 3.0981946 0.2239162 < 1e-8
predictors_Natural 0.1983297 0.3363078 0.555
```
### Plotting
```
plot_DivNet_diversity(aqua_peas_family_phy, peas_lighting_div_obj)
```
<a href="https://i.imgur.com/gNJwu5t.png"><img src="https://i.imgur.com/gNJwu5t.png"></a>
---
### Water (not sig: p = 0.304)
```
peas_water_div_obj <- make_div_obj(aqua_peas_family_phy, ~Water_circulation, cores = 4)
peas_water_div_res <- run_div_test(aqua_peas_family_phy, peas_water_div_obj)
```
```
With 'Pumps' as the baseline:
Estimates Standard.Errors p.values
(Intercept) 3.2475024 0.2249006 < 1e-8
predictors_Wicks -0.3565429 0.3470048 0.304
```
### Plotting
```
plot_DivNet_diversity(aqua_peas_family_phy, peas_water_div_obj)
```
<a href="https://i.imgur.com/dD34bvX.png"><img src="https://i.imgur.com/dD34bvX.png"></a>
---
### Nitrifying components (not sig: p = 0.159)
```
peas_N_div_obj <- make_div_obj(aqua_peas_family_phy, ~Nitrifying_components, cores = 4)
peas_N_div_res <- run_div_test(aqua_peas_family_phy, peas_N_div_obj)
```
```
With 'Collected_gravel' as the baseline:
Estimates Standard.Errors p.values
(Intercept) 2.8891087 0.2149975 < 1e-8
predictors_Commercial 0.3563273 0.2528028 0.159
```
### Plotting
```
plot_DivNet_diversity(aqua_peas_family_phy, peas_water_div_obj)
```
<a href="https://i.imgur.com/awDr1eQ.png"><img src="https://i.imgur.com/awDr1eQ.png"></a>
---
## Basil
### Timepoint (not sig: p = 0.239)
```
basil_time_point_div_obj <- make_div_obj(aqua_basil_family_phy, ~Time_point, cores = 4)
basil_time_point_T1_base_div_res <- run_div_test(aqua_basil_family_phy, basil_time_point_div_obj)
```
```
With 'T1' as the baseline:
Estimates Standard.Errors p.values
(Intercept) 2.79147025 0.1281569 < 1e-8
predictors_T2 0.25188425 0.4396179 0.567
predictors_T3 0.05699726 0.1654831 0.731
```
```
basil_time_point_T2_base_div_res <- run_div_test(aqua_basil_family_phy, basil_time_point_div_obj, target_ref = "T2")
```
```
Estimates Standard.Errors p.values
(Intercept) 3.0433545 0.1281569 < 1e-8
predictors_T1 -0.2518843 0.2282494 0.27
predictors_T3 -0.1948870 0.1654831 0.239
```
### Plotting
```
plot_DivNet_diversity(aqua_basil_family_phy, basil_time_point_div_obj)
```
<a href="https://i.imgur.com/1zpPvFg.png"><img src="https://i.imgur.com/1zpPvFg.png"></a>
---
### Lighting (not sig: p = 0.166)
```
basil_lighting_div_obj <- make_div_obj(aqua_basil_family_phy, ~Lighting, cores = 4)
basil_lighting_div_res <- run_div_test(aqua_basil_family_phy, basil_lighting_div_obj)
```
```
With 'Artificial' as the baseline:
Estimates Standard.Errors p.values
(Intercept) 2.7769250 0.1732599 < 1e-8
predictors_Natural 0.2877119 0.2078168 0.166
```
### Plotting
```
plot_DivNet_diversity(aqua_basil_family_phy, basil_lighting_div_obj)
```
<a href="https://i.imgur.com/Gy97pUP.png"><img src="https://i.imgur.com/Gy97pUP.png"></a>
---
### Water (not sig: p = 0.288)
```
basil_water_div_obj <- make_div_obj(aqua_basil_family_phy, ~Water_circulation, cores = 4)
basil_water_div_res <- run_div_test(aqua_basil_family_phy, basil_water_div_obj)
```
```
With 'Pumps' as the baseline:
Estimates Standard.Errors p.values
(Intercept) 3.0317425 0.115106 < 1e-8
predictors_Wicks -0.1771017 0.166659 0.288
```
### Plotting
```
plot_DivNet_diversity(aqua_basil_family_phy, basil_water_div_obj)
```
<a href="https://i.imgur.com/ZKDzqEt.png"><img src="https://i.imgur.com/ZKDzqEt.png"></a>
---
### Nitrifying components (not sig: p = 0.170)
```
basil_N_div_obj <- make_div_obj(aqua_basil_family_phy, ~Nitrifying_components, cores = 4)
basil_N_div_res <- run_div_test(aqua_basil_family_phy, basil_N_div_obj)
```
```
With 'Collected_gravel' as the baseline:
Estimates Standard.Errors p.values
(Intercept) 2.7015675 0.2525301 < 1e-8
predictors_Commercial 0.3786621 0.2756966 0.17
```
### Plotting
```
plot_DivNet_diversity(aqua_basil_family_phy, basil_water_div_obj)
```
<a href="https://i.imgur.com/RWbrAWV.png"><img src="https://i.imgur.com/RWbrAWV.png"></a>
---
## Tomatoes
### Timepoint (not sig: p = 0.146)
```
tomatoes_time_point_div_obj <- make_div_obj(aqua_tomatoes_family_phy, ~Time_point, cores = 4)
tomatoes_time_point_T1_base_div_res <- run_div_test(aqua_tomatoes_family_phy, tomatoes_time_point_div_obj)
```
```
With 'T1' as the baseline:
Estimates Standard.Errors p.values
(Intercept) 2.9060773 0.2620180 < 1e-8
predictors_T2 -0.0650556 0.4363785 0.881
predictors_T3 -0.7539750 0.5183790 0.146
```
```
tomatoes_time_point_T2_base_div_res <- run_div_test(aqua_tomatoes_family_phy, tomatoes_time_point_div_obj, target_ref = "T2")
```
```
With 'T2' as the baseline:
Estimates Standard.Errors p.values
(Intercept) 2.8410217 0.262018 < 1e-8
predictors_T1 0.0650556 0.422836 0.878
predictors_T3 -0.6889194 0.518379 0.184
```
### Plotting
```
plot_DivNet_diversity(aqua_tomatoes_family_phy, tomatoes_time_point_div_obj)
```
<a href="https://i.imgur.com/f3Qx8XU.png"><img src="https://i.imgur.com/f3Qx8XU.png"></a>
---
### Lighting (not sig: p = 0.329)
```
tomatoes_lighting_div_obj <- make_div_obj(aqua_tomatoes_family_phy, ~Lighting, cores = 4)
tomatoes_lighting_div_res <- run_div_test(aqua_tomatoes_family_phy, tomatoes_lighting_div_obj)
```
```
With 'Artificial' as the baseline:
Estimates Standard.Errors p.values
(Intercept) 2.7193033 0.1493807 < 1e-8
predictors_Natural -0.2234033 0.2288393 0.329
```
### Plotting
```
plot_DivNet_diversity(aqua_tomatoes_family_phy, tomatoes_lighting_div_obj)
```
<a href="https://i.imgur.com/X8tUhGx.png"><img src="https://i.imgur.com/X8tUhGx.png"></a>
---
### Water (not sig: p = 0.878)
```
tomatoes_water_div_obj <- make_div_obj(aqua_tomatoes_family_phy, ~Water_circulation, cores = 4)
tomatoes_water_div_res <- run_div_test(aqua_tomatoes_family_phy, tomatoes_water_div_obj)
```
```
With 'Pumps' as the baseline:
Estimates Standard.Errors p.values
(Intercept) 2.68439913 0.2048484 < 1e-8
predictors_Wicks 0.06047993 0.3926903 0.878
```
### Plotting
```
plot_DivNet_diversity(aqua_tomatoes_family_phy, tomatoes_water_div_obj)
```
<a href="https://i.imgur.com/XyrNsOE.png"><img src="https://i.imgur.com/XyrNsOE.png"></a>
---
### Nitrifying components (not sig: p = 0.293)
```
tomatoes_N_div_obj <- make_div_obj(aqua_tomatoes_family_phy, ~Nitrifying_components, cores = 4)
tomatoes_N_div_res <- run_div_test(aqua_tomatoes_family_phy, tomatoes_N_div_obj)
```
```
With 'Collected_gravel' as the baseline:
Estimates Standard.Errors p.values
(Intercept) 2.493579 0.1500111 < 1e-8
predictors_Commercial 0.225985 0.2147059 0.293
```
### Plotting
```
plot_DivNet_diversity(aqua_tomatoes_family_phy, tomatoes_water_div_obj)
```
<a href="https://i.imgur.com/KXqOmYY.png"><img src="https://i.imgur.com/KXqOmYY.png"></a>
---