ANOVA - HackMD

--- tags: Rscripts --- # ANOVA ### Author: Jocelyn P. Colella - Almost all comparisons are SIGNIFICANT at p < 0.05 AFTER Tukey's correction for multiple hits - NOT SIGNFICANT: - RQ: all males BL vs. Hot (unlogged data only) - H2O: all females BL vs. Cold (both logged and unlogged) - VCO2: daytime females BL vs. Hot (both logged and unlogged) - VO2: nighttime males BL vs. Cold (both logged and unlogged) - ges (reported below) = generalized eta squared (effect size). - ges measures the proportion of the variability in the dependent variable that can be explained in terms of the independent variable (here, treatment group [BL, hot, cold]) | data | EE | RQ | H2Omg | VO2 | VCO2| | -------- | -------- | -------- |----|-----|----| |all_F| 0.181| 0.074| 0.131| 0.174| 0.205| |all_day_F| 0.447| 0.275| 0.119| 0.451| 0.423| |all_night_F| 0.191| 0.078| 0.437| 0.181| 0.227| |all_M| 0.194| 0.03| 0.021| 0.195| 0.19| |all_day_M| 0.529| 0.173| 0.258| 0.533| 0.501| |all_night_M| 0.249| 0.188| 0.309| 0.255|0.222| ``` ### ANOVA ### ASSUMPTIONS # 1 - Independence of observations (**VIOLATED for hot/cold conditions) # 2 - No significant outiers (removed prior to analysis) # 3 - Normality (violated) # 4 - Homogeneity of variances (violated) # Check out the data all_noOL_F %>% sample_n_by(experiment, size = 1) levels(as.factor(all_noOL_F$experiment)) # Summary stats all_noOL_F %>% group_by(experiment) %>% get_summary_stats(EE, type = "mean_sd") ggboxplot(all_noOL_F, x = "experiment", y = "EE") ### ASSUMPTION 2 - Detect outliers all_noOL_F %>% group_by(experiment) %>% identify_outliers(EE) #NOTE: there are additional outliers (even though outliers have been removed once). Removing outliers again results in fewer (but still some) outliers ### ASSUMPTION 3 - Normality allexp_df_list = c("all_noOL_F", "all_day_noOL_F", "all_night_noOL_F", "all_noOL_M", "all_day_noOL_M", "all_night_noOL_M") #List of all LOGGED dataframes l_allexp_df_list = c("l_all_noOL_F", "l_all_day_noOL_F", "l_all_night_noOL_F", "l_all_noOL_M", "l_all_day_noOL_M", "l_all_night_noOL_M") # Use shapiro-wilks test to test for normality in LOGGED and UNLOGGED datasets (with only 1st round of outliers removed) for(i in l_allexp_df_list){ print(i) # Create model model <- lm(VCO2 ~ experiment, data = get(i)) ggqqplot(residuals(model)) model.metrics <- augment(model) %>% select(-.hat, -.sigma, -.fitted, -.se.fit) if (dim(model.metrics)[1] < 5000){ print(shapiro_test(model.metrics$.resid)) } else { subsamp <- model.metrics[sample(nrow(model.metrics), 4999), ] #shapiro_test can only examine <5000 samples, so randomly downsample print(shapiro_test(subsamp$.resid)) } } ### Significant p values = ASSUMPTION VIOLATION - residuals nonnormal ### EE - violated for both logged and unlogged datasets ### RQ - violated for both logged and unlogged datasets ### H2O - violated for both logged and unlogged datasets ### VO2 - violated for both logged and unlogged datasets ### VCO2 - violated for both logged and unlogged datasets #Make qqplots for all logged datasets for(i in l_allexp_df_list){ print(i) ggqqplot(get(i), "RQ", facet.by = "experiment") } ### ASSUMPTION 4 - Homogeneity of variance # Levene's test: Run for each DV and logged/unlogged datasets for(i in l_allexp_df_list){ print(i) model <- lm(VO2 ~ experiment, data = get(i)) model.metrics <- augment(model) %>% select(-.hat, -.sigma, -.fitted, -.se.fit) print(model.metrics %>% levene_test(.resid ~ model.metrics$experiment)) } # EE - all but one comparison was signifcant # unlogged all_night_noOL_M = NOT signif (p = 0.175) # RQ - all signif for both logged and unlogged datasets # H2O - all signif for both logged and unlogged datasets # VO2 - all signif for both logged and unlogged datasets # VCO2 - all but one comparison was significnat # unlogged all_night_noOL_M = NOT signif (p = 0.06) ############### #### ANOVA #### # Run for each DV for(i in allexp_df_list){ print(i) print(get(i) %>% anova_test(VCO2 ~ experiment)) } # Everything significant for both logged and unlogged data # using Tukey post-hoc tst for(i in allexp_df_list){ print(i) print(get(i) %>% tukey_hsd(RQ ~ experiment)) #print(get_emmeans(get(i) %>% emmeans_test(RQ ~ experiment, covariate = weight, # p.adjust.method = "bonferroni"))) } # EE - all signif for both logged and unlogged datasets # RQ - all but one comparison was signifcant # unlogged all_noOL_M BLvHot not significant # H2O - all but one comparison was signifcant # unlogged and logged all_noOL_F BLvCold not significant # VO2 - all but one comparison was signifcant # unloggged and logged all_night_noOL_M BLvCold not signif # VCO2- all but one comparison was signifcant # unlogged and logged all_day_noOL_F BLvHot not signif ```