Diversity biz (26-May-2021)

--- tags: Aquaponics title: Diversity biz (26-May-2021) --- # Diversity biz # Summary --- > **Nothing significantly different based on estimated shannon diversity at the family-level taxonomic classifications we have (that's the lowest taxonomic resolution we have).** --- [toc] # Setting up ```R library(phyloseq) library(DivNet) library(tidyverse) library(magrittr) theme_set(theme_bw) # helper functions for mass running things make_div_obj <- function(phy_obj, wanted_formula, cores = 1) { # running divnet div_obj <- phy_obj %>% divnet(formula = wanted_formula, ncores = cores) return(div_obj) } run_div_test <- function(phy_obj, div_obj, target_ref = NULL) { # getting covariate target_covariate <- attributes(div_obj$X)$contrasts %>% names # getting single estimate and se for each treatment to give to betta function # getting sample info tab alone sample_tab <- phy_obj %>% sample_data %>% data.frame # getting unique treatment types unique_treatment_types <- sample_tab %>% pull(target_covariate) %>% unique %>% as.character unique_samples_needed <- c() for ( type in unique_treatment_types ) { sample_to_add <- sample_tab %>% filter(!!as.symbol(target_covariate) == !!type) %>% head(1) %>% row.names() unique_samples_needed <- c(unique_samples_needed, sample_to_add) } # subsetting divnet object to one per unique type sub_div_obj_shannon <- div_obj$shannon[unique_samples_needed] sub_div_obj_shannon_variance <- div_obj$`shannon-variance`[unique_samples_needed] # getting estimates and standard errors for one entry for each type estimates <- c() ses <- c() for ( i in 1:length(unique_samples_needed) ) { estimates <- c(estimates, unlist(sub_div_obj_shannon[i])[1] %>% as.numeric()) ses <- c(ses, sqrt(sub_div_obj_shannon_variance[i])) } # making subset sample tab sub_sample_tab <- sample_tab[unique_samples_needed, ] # setting up matrix predictors_ <- sub_sample_tab[, names(sub_sample_tab) %in% target_covariate] # there is no built-in way to change the baseline in the contrasts, so modifying the factor levels as suggested on divnet issue here: https://github.com/adw96/DivNet/issues/37 if ( is.null(target_ref) ) { X <- model.matrix(~predictors_) baseline <- predictors_ %>% levels %>% head(1) } else { predictors_ <- predictors_ %>% relevel(ref = target_ref) baseline <- predictors_ %>% levels %>% head(1) X <- model.matrix(~predictors_) } res <- betta(estimates, ses, X) cat("With '", baseline, "' as the baseline:\n\n", sep = "") tab <- res$table %>% data.frame # p-values are calculated as: 2*(1-pnorm(abs(Estimate/SE))) (see betta function) # I think getting 0s is just due to how many digits R is working with. It seems # pnorm(5.32672) = 0.9999999 and pnorm(5.32673) = 1. So if our abs(Estimate/SE) # is greater than 5.32672, we’re going to get a p-value of 0. So setting 0's to # < 1e-8 for what is printed out new_p_vec <- vector() for ( value in tab$p.values ) { if ( value == 0 ) { new_p_vec <- c(new_p_vec, "< 1e-8") } else { new_p_vec <- c(new_p_vec, value) } } tab$p.values <- new_p_vec print(tab) return(res) } plot_DivNet_diversity <- function(phy_obj, div_obj, rank = "Family") { # getting covariate target_covariate <- attributes(div_obj$X)$contrasts %>% names # getting sample table from phyloseq object sample_tab <- phy_obj %>% sample_data %>% data.frame # getting unique treatment types unique_treatment_types <- sample_tab %>% pull(target_covariate) %>% unique %>% as.character unique_samples_needed <- c() for ( type in unique_treatment_types ) { sample_to_add <- sample_tab %>% filter(!!as.symbol(target_covariate) == type) %>% head(1) %>% row.names() unique_samples_needed <- c(unique_samples_needed, sample_to_add) } sub_div_obj_shannon <- div_obj$shannon[c(unique_samples_needed)] div_est_vec <- c() ci_vec <- c() for ( i in 1:length(unique_samples_needed) ) { div_est_vec <- c(div_est_vec, unlist(sub_div_obj_shannon[i])[1] %>% as.numeric()) ci_vec <- c(ci_vec, unlist(sub_div_obj_shannon[i])[2] %>% as.numeric() * 2) } sub_plot_tab <- data.frame(target_covariate = unique_treatment_types, shannon = div_est_vec, ci = ci_vec) # getting max value for plot y-axis # max_y <- max(sub_plot_tab$shannon + sub_plot_tab$ci) %>% ceiling y_axis_label = paste0("Shannon diversity estimate\n(", rank, " level)") plot <- ggplot(sub_plot_tab, aes(x = target_covariate, y = shannon, color = target_covariate)) + geom_errorbar(aes(ymin = shannon - ci, ymax = shannon + ci), width = 0) + geom_point() + theme_bw() + xlab(target_covariate) + ylab(y_axis_label) + theme(legend.position = "none") # + coord_cartesian(ylim = c(0,max_y)) return(plot) } ``` ``` # reading in our data (excluding EM1 and Lambda) otu_counts_tab <- read.table("combined_otu_counts_tab.txt", header=TRUE, row.names=1, sep="\t")[,c(1:30)] taxonomy_tab <- read.table("combined_taxonomy_tab.txt", header=TRUE, row.names=1, sep="\t", na.strings="<NA>") sample_info_tab <- read.table("sample_info_tab.txt", header=T, check.names=F, row.names=1, sep="\t")[c(1:30),] # making phyloseq object counts_phy <- otu_table(otu_counts_tab, taxa_are_rows=TRUE) tax_phy <- tax_table(as.matrix(taxonomy_tab)) sample_phy <- sample_data(sample_info_tab) aqua_phy <- phyloseq(counts_phy, tax_phy, sample_phy) # grouping at family rank aqua_family_phy <- tax_glom(aqua_phy, taxrank = "Family") ``` # Based on timepoint ``` time_point_div_obj <- make_div_obj(aqua_family_phy, ~Time_point, cores = 4) time_point_T1_base_div_res <- run_div_test(aqua_family_phy, time_point_div_obj) ``` ## Not Sig (p = 0.307) ``` With 'T1' as the baseline: Estimates Standard.Errors p.values (Intercept) 3.07915048 0.1057571 < 1e-8 predictors_T2 0.01151054 0.1735481 0.947 predictors_T3 -0.23486316 0.2413842 0.331 ``` ``` time_point_T2_base_div_res <- run_div_test(aqua_family_phy, time_point_div_obj, target_ref = "T2") ``` ``` With 'T2' as the baseline: Estimates Standard.Errors p.values (Intercept) 3.09066102 0.1057571 < 1e-8 predictors_T1 -0.01151054 0.1600364 0.943 predictors_T3 -0.24637370 0.2413842 0.307 ``` ## Plotting ``` plot_DivNet_diversity(aqua_family_phy, time_point_div_obj) ``` <a href="https://i.imgur.com/1HVkXIv.png"><img src="https://i.imgur.com/1HVkXIv.png"></a> --- # Based on lighting ``` lighting_div_obj <- make_div_obj(aqua_family_phy, ~Lighting, cores = 4) lighting_div_res <- run_div_test(aqua_family_phy, lighting_div_obj) ``` ## Not Sig (p = 0.208) ``` With 'Artificial' as the baseline: Estimates Standard.Errors p.values (Intercept) 2.9169832 0.1190661 < 1e-8 predictors_Natural 0.1772417 0.1407935 0.208 ``` ## Plotting ``` plot_DivNet_diversity(aqua_family_phy, lighting_div_obj) ``` <a href="https://i.imgur.com/C4LXsD9.png"><img src="https://i.imgur.com/C4LXsD9.png"></a> --- # Based on plants ``` plants_div_obj <- make_div_obj(aqua_family_phy, ~Plants, cores = 4) plants_div_res <- run_div_test(aqua_phy, plants_div_obj) ``` ## Tomatos lower diversity than Peas (0.068) ``` With 'Basil' as the baseline: Estimates Standard.Errors p.values (Intercept) 3.0236533 0.1563061 < 1e-8 predictors_Peas 0.1597980 0.2672525 0.55 predictors_Tomatoes -0.3516465 0.2807357 0.21 ``` ``` plants_div_res_peas_baseline <- run_div_test(aqua_phy, plants_div_obj, target_ref = "Peas") ``` ``` With 'Peas' as the baseline: Estimates Standard.Errors p.values (Intercept) 3.1834513 0.1563061 < 1e-8 predictors_Basil -0.1597980 0.2649870 0.546 predictors_Tomatoes -0.5114445 0.2807357 0.068 ``` ## Plotting ``` plot_DivNet_diversity(aqua_family_phy, plants_div_obj) ``` <a href="https://i.imgur.com/944FNda.png"><img src="https://i.imgur.com/944FNda.png"></a> --- # Based on water ``` water_div_obj <- make_div_obj(aqua_family_phy, ~Water_circulation, cores = 4) water_div_res <- run_div_test(aqua_family_phy, water_div_obj) ``` ## Not Sig (0.26) ```With 'Pumps' as the baseline: Estimates Standard.Errors p.values (Intercept) 3.1727239 0.1948949 < 1e-8 predictors_Wicks -0.3268373 0.2880062 0.256 ``` ## Plotting ``` plot_DivNet_diversity(aqua_family_phy, water_div_obj) ``` <a href="https://i.imgur.com/VZezc88.png"><img src="https://i.imgur.com/VZezc88.png"></a> --- # Based on nitrifying components ``` N_div_obj <- make_div_obj(aqua_family_phy, ~Nitrifying_components, cores = 4) N_div_res <- run_div_test(aqua_family_phy, N_div_obj) ``` ## Not Sig (0.172) ``` With 'Collected_gravel' as the baseline: Estimates Standard.Errors p.values (Intercept) 2.775672 0.2106208 < 1e-8 predictors_Commercial 0.347640 0.2547854 0.172 ``` ## Plotting ``` plot_DivNet_diversity(aqua_family_phy, N_div_obj) ``` <a href="https://i.imgur.com/QXejwuE.png"><img src="https://i.imgur.com/QXejwuE.png"></a> --- # Splitting plants ``` aqua_peas_family_phy <- subset_samples(aqua_family_phy, Plants == "Peas") aqua_basil_family_phy <- subset_samples(aqua_family_phy, Plants == "Basil") aqua_tomatoes_family_phy <- subset_samples(aqua_family_phy, Plants == "Tomatoes") ``` ## Peas ### Timepoint (not sig: p = 0.45) ``` peas_time_point_div_obj <- make_div_obj(aqua_peas_family_phy, ~Time_point, cores = 4) peas_time_point_T1_base_div_res <- run_div_test(aqua_peas_family_phy, peas_time_point_div_obj) ``` ``` With 'T1' as the baseline: Estimates Standard.Errors p.values (Intercept) 3.0053968 0.2185220 < 1e-8 predictors_T2 0.2398329 0.3193968 0.453 predictors_T3 -0.2985799 0.4024406 0.458 ``` ``` peas_time_point_T2_base_div_res <- run_div_test(aqua_peas_family_phy, peas_time_point_div_obj, target_ref = "T2") ``` ``` With 'T2' as the baseline: Estimates Standard.Errors p.values (Intercept) 3.2452297 0.2185220 < 1e-8 predictors_T1 -0.2398329 0.4488045 0.593 predictors_T3 -0.5384127 0.4024406 0.181 ``` ### Plotting ``` plot_DivNet_diversity(aqua_peas_family_phy, peas_time_point_div_obj) ``` <a href="https://i.imgur.com/b5fyCwo.png"><img src="https://i.imgur.com/b5fyCwo.png"></a> --- ### Lighting (not sig: p = 0.555) ``` peas_lighting_div_obj <- make_div_obj(aqua_peas_family_phy, ~Lighting, cores = 4) peas_lighting_div_res <- run_div_test(aqua_peas_family_phy, peas_lighting_div_obj) ``` ``` With 'Artificial' as the baseline: Estimates Standard.Errors p.values (Intercept) 3.0981946 0.2239162 < 1e-8 predictors_Natural 0.1983297 0.3363078 0.555 ``` ### Plotting ``` plot_DivNet_diversity(aqua_peas_family_phy, peas_lighting_div_obj) ``` <a href="https://i.imgur.com/gNJwu5t.png"><img src="https://i.imgur.com/gNJwu5t.png"></a> --- ### Water (not sig: p = 0.304) ``` peas_water_div_obj <- make_div_obj(aqua_peas_family_phy, ~Water_circulation, cores = 4) peas_water_div_res <- run_div_test(aqua_peas_family_phy, peas_water_div_obj) ``` ``` With 'Pumps' as the baseline: Estimates Standard.Errors p.values (Intercept) 3.2475024 0.2249006 < 1e-8 predictors_Wicks -0.3565429 0.3470048 0.304 ``` ### Plotting ``` plot_DivNet_diversity(aqua_peas_family_phy, peas_water_div_obj) ``` <a href="https://i.imgur.com/dD34bvX.png"><img src="https://i.imgur.com/dD34bvX.png"></a> --- ### Nitrifying components (not sig: p = 0.159) ``` peas_N_div_obj <- make_div_obj(aqua_peas_family_phy, ~Nitrifying_components, cores = 4) peas_N_div_res <- run_div_test(aqua_peas_family_phy, peas_N_div_obj) ``` ``` With 'Collected_gravel' as the baseline: Estimates Standard.Errors p.values (Intercept) 2.8891087 0.2149975 < 1e-8 predictors_Commercial 0.3563273 0.2528028 0.159 ``` ### Plotting ``` plot_DivNet_diversity(aqua_peas_family_phy, peas_water_div_obj) ``` <a href="https://i.imgur.com/awDr1eQ.png"><img src="https://i.imgur.com/awDr1eQ.png"></a> --- ## Basil ### Timepoint (not sig: p = 0.239) ``` basil_time_point_div_obj <- make_div_obj(aqua_basil_family_phy, ~Time_point, cores = 4) basil_time_point_T1_base_div_res <- run_div_test(aqua_basil_family_phy, basil_time_point_div_obj) ``` ``` With 'T1' as the baseline: Estimates Standard.Errors p.values (Intercept) 2.79147025 0.1281569 < 1e-8 predictors_T2 0.25188425 0.4396179 0.567 predictors_T3 0.05699726 0.1654831 0.731 ``` ``` basil_time_point_T2_base_div_res <- run_div_test(aqua_basil_family_phy, basil_time_point_div_obj, target_ref = "T2") ``` ``` Estimates Standard.Errors p.values (Intercept) 3.0433545 0.1281569 < 1e-8 predictors_T1 -0.2518843 0.2282494 0.27 predictors_T3 -0.1948870 0.1654831 0.239 ``` ### Plotting ``` plot_DivNet_diversity(aqua_basil_family_phy, basil_time_point_div_obj) ``` <a href="https://i.imgur.com/1zpPvFg.png"><img src="https://i.imgur.com/1zpPvFg.png"></a> --- ### Lighting (not sig: p = 0.166) ``` basil_lighting_div_obj <- make_div_obj(aqua_basil_family_phy, ~Lighting, cores = 4) basil_lighting_div_res <- run_div_test(aqua_basil_family_phy, basil_lighting_div_obj) ``` ``` With 'Artificial' as the baseline: Estimates Standard.Errors p.values (Intercept) 2.7769250 0.1732599 < 1e-8 predictors_Natural 0.2877119 0.2078168 0.166 ``` ### Plotting ``` plot_DivNet_diversity(aqua_basil_family_phy, basil_lighting_div_obj) ``` <a href="https://i.imgur.com/Gy97pUP.png"><img src="https://i.imgur.com/Gy97pUP.png"></a> --- ### Water (not sig: p = 0.288) ``` basil_water_div_obj <- make_div_obj(aqua_basil_family_phy, ~Water_circulation, cores = 4) basil_water_div_res <- run_div_test(aqua_basil_family_phy, basil_water_div_obj) ``` ``` With 'Pumps' as the baseline: Estimates Standard.Errors p.values (Intercept) 3.0317425 0.115106 < 1e-8 predictors_Wicks -0.1771017 0.166659 0.288 ``` ### Plotting ``` plot_DivNet_diversity(aqua_basil_family_phy, basil_water_div_obj) ``` <a href="https://i.imgur.com/ZKDzqEt.png"><img src="https://i.imgur.com/ZKDzqEt.png"></a> --- ### Nitrifying components (not sig: p = 0.170) ``` basil_N_div_obj <- make_div_obj(aqua_basil_family_phy, ~Nitrifying_components, cores = 4) basil_N_div_res <- run_div_test(aqua_basil_family_phy, basil_N_div_obj) ``` ``` With 'Collected_gravel' as the baseline: Estimates Standard.Errors p.values (Intercept) 2.7015675 0.2525301 < 1e-8 predictors_Commercial 0.3786621 0.2756966 0.17 ``` ### Plotting ``` plot_DivNet_diversity(aqua_basil_family_phy, basil_water_div_obj) ``` <a href="https://i.imgur.com/RWbrAWV.png"><img src="https://i.imgur.com/RWbrAWV.png"></a> --- ## Tomatoes ### Timepoint (not sig: p = 0.146) ``` tomatoes_time_point_div_obj <- make_div_obj(aqua_tomatoes_family_phy, ~Time_point, cores = 4) tomatoes_time_point_T1_base_div_res <- run_div_test(aqua_tomatoes_family_phy, tomatoes_time_point_div_obj) ``` ``` With 'T1' as the baseline: Estimates Standard.Errors p.values (Intercept) 2.9060773 0.2620180 < 1e-8 predictors_T2 -0.0650556 0.4363785 0.881 predictors_T3 -0.7539750 0.5183790 0.146 ``` ``` tomatoes_time_point_T2_base_div_res <- run_div_test(aqua_tomatoes_family_phy, tomatoes_time_point_div_obj, target_ref = "T2") ``` ``` With 'T2' as the baseline: Estimates Standard.Errors p.values (Intercept) 2.8410217 0.262018 < 1e-8 predictors_T1 0.0650556 0.422836 0.878 predictors_T3 -0.6889194 0.518379 0.184 ``` ### Plotting ``` plot_DivNet_diversity(aqua_tomatoes_family_phy, tomatoes_time_point_div_obj) ``` <a href="https://i.imgur.com/f3Qx8XU.png"><img src="https://i.imgur.com/f3Qx8XU.png"></a> --- ### Lighting (not sig: p = 0.329) ``` tomatoes_lighting_div_obj <- make_div_obj(aqua_tomatoes_family_phy, ~Lighting, cores = 4) tomatoes_lighting_div_res <- run_div_test(aqua_tomatoes_family_phy, tomatoes_lighting_div_obj) ``` ``` With 'Artificial' as the baseline: Estimates Standard.Errors p.values (Intercept) 2.7193033 0.1493807 < 1e-8 predictors_Natural -0.2234033 0.2288393 0.329 ``` ### Plotting ``` plot_DivNet_diversity(aqua_tomatoes_family_phy, tomatoes_lighting_div_obj) ``` <a href="https://i.imgur.com/X8tUhGx.png"><img src="https://i.imgur.com/X8tUhGx.png"></a> --- ### Water (not sig: p = 0.878) ``` tomatoes_water_div_obj <- make_div_obj(aqua_tomatoes_family_phy, ~Water_circulation, cores = 4) tomatoes_water_div_res <- run_div_test(aqua_tomatoes_family_phy, tomatoes_water_div_obj) ``` ``` With 'Pumps' as the baseline: Estimates Standard.Errors p.values (Intercept) 2.68439913 0.2048484 < 1e-8 predictors_Wicks 0.06047993 0.3926903 0.878 ``` ### Plotting ``` plot_DivNet_diversity(aqua_tomatoes_family_phy, tomatoes_water_div_obj) ``` <a href="https://i.imgur.com/XyrNsOE.png"><img src="https://i.imgur.com/XyrNsOE.png"></a> --- ### Nitrifying components (not sig: p = 0.293) ``` tomatoes_N_div_obj <- make_div_obj(aqua_tomatoes_family_phy, ~Nitrifying_components, cores = 4) tomatoes_N_div_res <- run_div_test(aqua_tomatoes_family_phy, tomatoes_N_div_obj) ``` ``` With 'Collected_gravel' as the baseline: Estimates Standard.Errors p.values (Intercept) 2.493579 0.1500111 < 1e-8 predictors_Commercial 0.225985 0.2147059 0.293 ``` ### Plotting ``` plot_DivNet_diversity(aqua_tomatoes_family_phy, tomatoes_water_div_obj) ``` <a href="https://i.imgur.com/KXqOmYY.png"><img src="https://i.imgur.com/KXqOmYY.png"></a> ---