--- tags: Rscripts --- # importing all respo data into R with functions --- title: "All Analysis and data" author: "Dani Blumstein" date: "3/2/2020" output: html_document --- bring in libs ```{r} library(devtools) library(tidyverse) library(lubridate) library(readr) library(viridis) library(patchwork) library(tidyselect) library(readxl) ``` Important equations: RQ = CO2 eliminated/O2 consumed EE = 0.06 * (3.941 * VO2 + 1.106 * VCO2) Functions: import data function ```{r} bring_in_data <- function(data_file, Sex) { data <- paste(path,data_file,sep="") raw <- read_csv(data, col_types = cols(Animal = col_double(), StartDate = col_date(format = "%m/%d/%Y"), deltaCO2 = col_double(), deltaH2O = col_double(), H2Oml = col_double(), Deg_C = col_double(), VCO2 = col_double(), StartTime = col_time(format = "%H:%M:%S"))) '%!in%' <- function(x,y)!('%in%'(x,y)) raw <- raw %>% mutate(EE = 0.06*(3.941*VO2 + 1.106*VCO2)) %>% mutate(RQ = VCO2/VO2) %>% mutate(animal = round(Animal, digits=0)) %>% mutate(Animal = NULL) %>% mutate(Sex = Sex) %>% unite("DateTime", StartDate:StartTime, remove = FALSE, sep = " ") %>% mutate(weight = ifelse(animal == 0, cageweight0, ifelse(animal == 1, cageweight1, ifelse(animal == 2, cageweight2, ifelse(animal == 3, cageweight3, ifelse(animal == 4, cageweight4, ifelse(animal == 5, cageweight5, ifelse(animal == 6, cageweight6, NA)))))))) %>% mutate(Animal_ID = ifelse(animal == 0, animalID0, ifelse(animal == 1, animalID1, ifelse(animal == 2, animalID2, ifelse(animal == 3, animalID3, ifelse(animal == 4, animalID4, ifelse(animal == 5, animalID5, ifelse(animal == 6, animalID6, NA)))))))) %>% mutate(H2Omg_edit = ifelse(hour(StartTime) == 8, H2Omg, ifelse(hour(StartTime) == 7, H2Omg, ifelse(hour(StartTime) == 9, H2Omg, ifelse(hour(StartTime) == 10, H2Omg, ifelse(hour(StartTime) == 19, H2Omg, ifelse(hour(StartTime) == 20, H2Omg, ifelse(hour(StartTime) == 21, H2Omg, ifelse(hour(StartTime) == 22, H2Omg, ifelse(hour(StartTime) %!in% c(7,8,9,10,20,21,22,19), H2Omg, NA)))))))))) %>% mutate_at("H2Omg_edit", as.numeric) #metric <- "corEE" target <- c(0,1,2,3,4,5,6,7) cages <- raw %>% filter(animal %in% target) #start_time <- ymd_hms(subset[[5]][1]) #begin_experiment <- start_time + dhours(2) #end_time <- begin_experiment + dhours(72) #filtered <- subset %>% filter(raw$DateTime >= begin_experiment & raw$DateTime <= end_time) return(cages) } ``` mouse id and weights function will add electrolytes....one day ```{r} weight_ID <- function(date) { subset <- electrolyte_data[which(electrolyte_data$experiment_date == date), names(electrolyte_data) %in% c("sex", "mouse_ID", "cage_number", "weight", "Na", "K", "Cl", "TCO2", "BUN", "Crea", "Glu", "iCa", "AnGap", "Hct", "Hb*")] ids <- subset$mouse_ID weights <- as.double(subset$weight) x = 0 for (i in 1:length(ids)) { assign(paste("animalID", x, sep = ""), ids[i], envir = parent.frame()) assign(paste("cageweight", x, sep = ""), weights[i], envir = parent.frame()) x = x + 1 } } ``` merge data and subset for 72 hours function ```{r} merge_data <- function(cage) { start_time <- ymd_hms(cage[[1]][1]) begin_experiment <- start_time + dhours(2) end_time <- begin_experiment + dhours(72) filtered <- cage %>% filter(DateTime >= begin_experiment & DateTime <= end_time) return(filtered) } ``` import data electrolyte and weight data and path to files. this should be one of the only things edited for now ```{r} path <- "~/Box Sync/UNH/metabolic chamber/Cactus_Mouse_Physiology/data/" electrolyte_data <- read_excel(paste(path,"electrolyte_data.xlsx",sep=""), na = "NA") ``` males 1 baseline Feb 26, 2020 ```{r} weight_ID("26-Feb-20") cages26feb <- merge_data(bring_in_data("26Feb20/feb26.csv", "M")) # start_time <- ymd_hms(cages26feb[[1]][1]) # cages26feb$h <- 0 # x=0 # for (i in 1:nrow(cages26feb)) { # hour <- start_time + dhours(x) <- x # } # begin_experiment <- start_time + dhours(0) # end_time <- begin_experiment + dhours(72) # filtered <- cage %>% filter(DateTime >= begin_experiment & DateTime <= end_time) ``` males 2 baseline Mar 10, 2020 ```{r} weight_ID("5-Mar-20") cages10mar <- merge_data(bring_in_data("10Mar20/mar10.csv", "M")) ``` females 1 baseline Feb 20, 2020 ```{r} weight_ID("20-Feb-20") cages20feb <- merge_data(bring_in_data("20Feb20/feb20.csv", "F")) ``` females 2 baseline March 14, 2020 ```{r} weight_ID("14-Mar-20") cages14mar <- merge_data(bring_in_data("14Mar20/mar14.csv", "F")) ``` males dehydration test (baseline temp cycle) April 16, 2020 ```{r} weight_ID("16-Apr-20") cages16april <- merge_data(bring_in_data("16April20/16_april_2020_day3.csv", "M")) ``` females1 constant 70 deg temp April 20, 2020 ```{r} weight_ID("20-Apr-20") cages20april1 <- bring_in_data("20April20/20_april_2020_1.csv", "F") weight_ID("20-Apr-20") cages20april2 <- bring_in_data("20April20/20_april_2020_2.csv", "F") weight_ID("20-Apr-20") cages20april3 <- bring_in_data("20April20/20_april_2020_3.csv", "F") weight_ID("20-Apr-20") cages20april4 <- bring_in_data("20April20/20_april_2020_4.csv", "F") cages20april_all <- rbind(cages20april1, cages20april2, cages20april3, cages20april4) cages20april <- merge_data(rbind(cages20april2, cages20april3, cages20april4)) ggplot(cages20april, aes(as.POSIXct(with(cages20april, StartDate + hms(StartTime))),y=Deg_C))+ geom_line() ``` males1 constant 70 deg temp April 27, 2020 ```{r} weight_ID("27-Apr-20") cages27april1 <- bring_in_data("27April20/27_april_2020_1.csv", "M") weight_ID("27-Apr-20") cages27april2 <- bring_in_data("27April20/27_april_2020_2.csv", "M") weight_ID("27-Apr-20") cages27april3 <- bring_in_data("27April20/27_april_2020_3.csv", "M") cages27april <- merge_data(rbind(cages27april1, cages27april2, cages27april3)) ggplot(cages27april, aes(as.POSIXct(with(cages27april, StartDate + hms(StartTime))),y=Deg_C))+ geom_line() ``` females2 constant 70 deg temp April 30, 2020 ```{r} weight_ID("30-Apr-20") cages30april1 <- bring_in_data("30April20/30_april_2020_1.csv", "F") weight_ID("30-Apr-20") cages30april2 <- bring_in_data("30April20/30_april_2020_2_wrong_temp.csv", "F") weight_ID("30-Apr-20") cages30april3 <- bring_in_data("30April20/30_april_2020_3_wrong_temp.csv", "F") weight_ID("30-Apr-20") cages30april4 <- bring_in_data("30April20/30_april_2020_4.csv", "F") weight_ID("30-Apr-20") cages30april5 <- bring_in_data("30April20/30_april_2020_5.csv", "F") weight_ID("30-Apr-20") cages30april6 <- bring_in_data("30April20/30_april_2020_6.csv", "F") cages30april_all <- rbind(cages30april1, cages30april2, cages30april3, cages30april4, cages30april5, cages30april6) cages30april <- merge_data(rbind(cages30april4, cages30april5, cages30april6)) ggplot(cages30april, aes(as.POSIXct(with(cages30april, StartDate + hms(StartTime))),y=Deg_C))+ geom_line() ``` males2 constant 70 deg temp May 7, 2020 temp spikes during collection ```{r} # weight_ID("7-May-20") # cages7may1 <- bring_in_data("7May20/7_may_2020_1.csv", "M") # # weight_ID("7-May-20") # cages7may2 <- bring_in_data("7May20/7_may_2020_2.csv", "M") # # weight_ID("7-May-20") # cages7may3 <- bring_in_data("7May20/7_may_2020_3.csv", "M") # # weight_ID("7-May-20") # cages7may4 <- bring_in_data("7May20/7_may_2020_4_wrongtemp.csv", "M") # weight_ID("7-May-20") # cages7may5 <- bring_in_data("7May20/7_may_2020_5.csv", "M") # # weight_ID("7-May-20") # cages7may6 <- bring_in_data("7May20/7_may_2020_6.csv", "M") # # weight_ID("7-May-20") # cages7may7 <- bring_in_data("7May20/7_may_2020_7_wrongtemp.csv", "M") # # weight_ID("7-May-20") # cages7may8 <- bring_in_data("7May20/7_may_2020_8.csv", "M") # # weight_ID("7-May-20") # cages7may9 <- bring_in_data("7May20/7_may_2020_9.csv", "M") # # weight_ID("7-May-20") # cages7may10 <- bring_in_data("7May20/7_may_2020_10.csv", "M") # # weight_ID("7-May-20") # cages7may11 <- bring_in_data("7May20/7_may_2020_11.csv", "M") # # cages7may_all <- rbind(cages7may1, cages7may2, cages7may3, cages7may4, cages7may5, cages7may6, cages7may7, cages7may8, cages7may9, cages7may10, cages7may11) # # cages7may <- merge_data(rbind(cages7may9, cages7may10, cages7may11)) # # ggplot(cages7may_all, aes(as.POSIXct(with(cages7may_all, StartDate + hms(StartTime))),y=Deg_C))+ # geom_line() ``` males 2 constant 90 deg temp May 18, 2020 ```{r} weight_ID("18-May-20") cages18may1 <- bring_in_data("18May20/18_may_20_1.csv", "M") weight_ID("18-May-20") cages18may2 <- bring_in_data("18May20/18_may_20_2.csv", "M") weight_ID("18-May-20") cages18may3 <- bring_in_data("18May20/18_may_20_3.csv", "M") weight_ID("18-May-20") cages18may4 <- bring_in_data("18May20/18_may_20_4.csv", "M") cages18may_all <- rbind(cages18may1, cages18may2, cages18may3, cages18may4) cages18may <- merge_data(rbind(cages18may2, cages18may3, cages18may4)) ggplot(cages18may, aes(as.POSIXct(with(cages18may, StartDate + hms(StartTime))),y=Deg_C))+ geom_line() ``` Males 1 constant 90 deg temp May 22, 2020 NOT A REAL EXPERIMENT. DID NOT COLLECT ENOUGH DAYS OF DATA ```{r} # weight_ID("22-May-20") # cages22may1 <- bring_in_data("22May20/22_may_20_1.csv", "M") # # weight_ID("22-May-20") # cages22may2 <- bring_in_data("22May20/22_may_20_2.csv", "M") # # weight_ID("22-May-20") # cages22may3 <- bring_in_data("22May20/22_may_20_2.csv", "M") # # cages22may <- merge_data(rbind(cages22may1, cages22may2, cages22may3)) # # ggplot(cages22may, aes(as.POSIXct(with(cages22may, StartDate + hms(StartTime))),y=Deg_C))+ # geom_line() ``` Females 1 constant 90 deg temp May 25, 2020 ```{r} weight_ID("25-May-20") cages25may1 <- bring_in_data("25May20/25_may_20_1.csv", "F") weight_ID("25-May-20") cages25may2 <- bring_in_data("25May20/25_may_20_2.csv", "F") weight_ID("25-May-20") cages25may3 <- bring_in_data("25May20/25_may_20_3.csv", "F") weight_ID("25-May-20") cages25may4 <- bring_in_data("25May20/25_may_20_4.csv", "F") cages25may <- merge_data(rbind(cages25may1, cages25may2, cages25may3, cages25may4)) ggplot(cages25may, aes(as.POSIXct(with(cages25may, StartDate + hms(StartTime))),y=Deg_C))+ scale_x_datetime(date_breaks = "2 hours", date_labels = "%H:%M")+ geom_line() ``` Females 2 constant 90 deg temp May 29, 2020 ```{r} weight_ID("29-May-20") cages29may1 <- bring_in_data("29May20/29_may_20_1.csv", "F") weight_ID("29-May-20") cages29may2 <- bring_in_data("29May20/29_may_20_2.csv", "F") weight_ID("29-May-20") cages29may3 <- bring_in_data("29May20/29_may_20_3.csv", "F") cages29may <- merge_data(rbind(cages29may1, cages29may2, cages29may3)) ggplot(cages29may, aes(as.POSIXct(with(cages29may, StartDate + hms(StartTime))),y=Deg_C))+ scale_x_datetime(date_breaks = "2 hours", date_labels = "%H:%M")+ geom_line() ``` Males redo constant 90 deg temp June 2, 2020 ```{r} weight_ID("2-Jun-20") cages3june1 <- bring_in_data("3June20/3_june_20_1.csv", "M") weight_ID("2-Jun-20") cages3june2 <- bring_in_data("3June20/3_june_20_2.csv", "M") weight_ID("2-Jun-20") cages3june3 <- bring_in_data("3June20/3_june_20_3.csv", "M") weight_ID("2-Jun-20") cages3june4 <- bring_in_data("3June20/3_june_20_4.csv", "M") cages3june <- merge_data(rbind(cages3june1, cages3june2, cages3june3, cages3june4)) ggplot(cages3june, aes(as.POSIXct(with(cages3june, StartDate + hms(StartTime))),y=Deg_C))+ scale_x_datetime(date_breaks = "2 hours", date_labels = "%H:%M")+ geom_line() ``` males2 constant 70 deg temp redo June 7, 2020 ```{r} weight_ID("7-Jun-20") cages7june1 <- bring_in_data("7June20/7_june_20_1.csv", "M") weight_ID("7-Jun-20") cages7june2 <- bring_in_data("7June20/7_june_20_2.csv", "M") weight_ID("7-Jun-20") cages7june3 <- bring_in_data("7June20/7_june_20_3.csv", "M") weight_ID("7-Jun-20") cages7june4 <- bring_in_data("7June20/7_june_20_4.csv", "M") cages7june <- merge_data(rbind(cages7june2, cages7june3, cages7june4)) ggplot(data = cages7june, aes(x = as.POSIXct(with(cages7june, StartDate + hms(StartTime))),y=Deg_C))+ scale_x_datetime(date_breaks = "2 hours", date_labels = "%H:%M")+ geom_line() ``` new cage experiments can be added here. follow code from above to do this "final" data sets ```{r} baseline <- rbind(cages26feb, cages10mar, cages20feb, cages14mar) baseline$experiment <- "baseline" cold <- rbind(cages20april, cages27april, cages30april) cold$experiment <- "cold" hot <- rbind(cages18may, cages25may, cages29may) hot$experiment <- "hot" ``` select your data set for all down stream code. this should be changed depending on what down stream analysis you are doing ```{r} all_animals <- rbind(hot,cold, baseline) #remove empty cage target <- c(0,1,2,3,4,5,6) cages <- all_animals %>% filter(animal %in% target) ``` plot some data. bad plot but this will let you know if there were any issues with your code or data from above. ```{r} legend_title <- "Animal ID" ggplot(cages, aes(Deg_C, EE, color = experiment)) + geom_point() #scale_x_datetime(date_breaks = "2 hours", date_labels = "%H:%M") #scale_colour_viridis_d(legend_title, option = "C") + #ylim() ``` getting data clean and analysis ready ```{r} analysis_data <- cages[,c("Sex","EE","H2Omg", "RQ", "StartTime","Animal_ID", "VO2", "VCO2", "Deg_C", "weight", "experiment", "SD_VCO2", "SD_VO2", "StartDate", "SD_H2Omg")] #split the time up by hour to make it an easier vairable to work with analysis_data_edited <- data.frame(do.call('rbind', strsplit(as.character(analysis_data$StartTime),':',fixed=TRUE))) #add it back to the subseted data for analysis analysis_data_inter <- cbind(analysis_data,analysis_data_edited) analysis_data_final <- analysis_data_inter[,c("Sex","EE","H2Omg", "RQ","Animal_ID", "Deg_C", "weight", "experiment", "X1", "StartTime", "SD_VCO2", "SD_VO2", "SD_H2Omg", "VO2", "VCO2", "StartDate")] analysis_data_final$Sex <- as.factor(analysis_data_final$Sex) analysis_data_final$hour <- analysis_data_final$X1 analysis_data_final$X1 <- NULL analysis_data_final$Deg_C <- as.double(analysis_data_final$Deg_C) analysis_data_final$Animal_ID <- as.numeric(analysis_data_final$Animal_ID) analysis_data_final$experiment <- as.factor(analysis_data_final$experiment) names(analysis_data_final)[3] <- "H2Omg" write_csv(analysis_data_final, path = "/Users/danielleblumstein/Desktop/analysis_data_final.csv") ``` temp needs to have one unmerged data set from the experiment you want to look at. haven't found a better way to do this yet ```{r} metric0 <- "Deg_C" target <- 7 #the "cages20april" is what you will need to change to look at whatever temperature you are interest in cagetemp <- cages20april %>% filter(animal %in% target) cagetemp <- tail(cagetemp, n=320) measurement_zero <- cagetemp %>% dplyr::select(all_of(metric0)) df<-as.data.frame(measurement_zero[[metric0]]) temp_baseline <- ggplot(data = cagetemp,aes(x=as.POSIXct(StartTime),y=measurement_zero[[metric0]])) + geom_line(aes(), size = 2) + theme(axis.text.x = element_blank(), axis.text.y=element_text(size=12), legend.position = "none") + labs(x = "", y = metric0) + scale_colour_viridis_d(legend_title, option = "C") + scale_x_datetime(date_breaks = "2 hours", date_labels = "%H:%M") metric1 <- "RQ" measurement_one <- cages %>% dplyr::select(all_of(metric1)) df<-as.data.frame(measurement_one[[metric1]]) legend_title <- "Animal ID" RQ <- ggplot(data = cages,aes(color=factor(experiment),x=as.POSIXct(StartTime),y=measurement_one[[metric1]]))+ geom_point(aes(alpha=0.2, group=as.factor(experiment), color=as.factor(experiment)), size = 1)+ theme(axis.text.x = element_blank(), axis.text.y=element_text(size=12))+ geom_smooth(data=df$V1, method='loess', span=.4, level=0.99)+ labs(x = "", y = metric1)+ scale_color_manual(legend_title, values=c("#050505", "#FF3338", "blue"))+ scale_x_datetime(date_breaks = "2 hours", date_labels = "%H:%M")+ geom_hline(yintercept = 0.8907387, color='orange', size=1)+ ylim(0.5, 2)+ guides(alpha=FALSE) metric2 <- "EE" measurement_two <- cages %>% dplyr::select(all_of(metric2)) df<-as.data.frame(measurement_two[[metric2]]) legend_title <- "Animal ID" EE <- ggplot(data = cages,aes(colour=factor(experiment),x=as.POSIXct(StartTime),y=measurement_two[[metric2]]))+ geom_point(aes(alpha=0.2,group=as.factor(experiment), color=as.factor(experiment)), size = 1,)+ theme(axis.text.x = element_blank(), axis.text.y=element_text(size=12), legend.position = "none")+ geom_smooth(data=df$V1, method='loess', span=.4, level=0.99)+ labs(x = "", y = metric2)+ scale_color_manual(values=c("#050505", "#FF3338", "blue"))+ scale_x_datetime(date_breaks = "2 hours", date_labels = "%H:%M") metric3 <- "H2Omg" measurement_three <- cages %>% dplyr::select(all_of(metric3)) df<-as.data.frame(measurement_three[[metric3]]) legend_title <- "Animal ID" h2o <- ggplot(data = cages,aes(colour=factor(experiment),x=as.POSIXct(StartTime),y=measurement_three[[metric3]]))+ geom_point(aes(alpha=0.2,group=as.factor(experiment), color=as.factor(experiment)), size = 1)+ theme(axis.text.y=element_text(size=12), legend.position = "none", axis.text.x=element_text(size=12))+ geom_smooth(data=df$V1, method='loess', span=.1, level=0.99)+ labs(x = "", y = metric3)+ scale_color_manual(values=c("#050505", "#FF3338", "blue"))+ scale_x_datetime(date_breaks = "2 hours", date_labels = "%H:%M") RQ/EE/h2o ``` small data exploration with graphs ```{r} unique(analysis_data_final$hour) p1 <- ggplot(analysis_data_final, aes(RQ, EE, color = Sex)) + geom_point() #p2 <- ggplot(analysis_data_final, aes(H2Omg, EE, color = hour , shape=Sex)) + #geom_point() + #scale_color_viridis(direction = -1) + #scale_colour_gradient2(low = "blue", mid = "red", high = "blue", midpoint = 12) #theme_minimal() p3 <- ggplot(analysis_data_final, aes(RQ, H2Omg, color =Sex)) + geom_point() p4 <- ggplot(analysis_data_final, aes(RQ, Deg_C, color =Sex)) + geom_point() p5 <- ggplot(analysis_data_final, aes(EE, Deg_C, color =Sex)) + geom_point() p6 <- ggplot(analysis_data_final, aes(H2Omg, Deg_C, color =Sex)) + geom_point() p7 <- ggplot(all_animals, aes(weight, EE, color = Sex)) + geom_point() #plot weight line for males and for females with a regression (male and female mean) p1/p3/p4/p5/p6/p7 ``` this is the code for subsetting means for each individual for nights and days. only weights so far no electrolytes yet..... can't really remember what this code does ```{r} night <- with(analysis_data_inter ,analysis_data_inter[ hour( StartTime ) >= 0 & hour( StartTime ) < 6 , ] ) day <- with(analysis_data_inter ,analysis_data_inter[ hour( StartTime ) >= 13 & hour( StartTime ) < 19 , ] ) animalweight <- night %>% group_by(Animal_ID) %>% summarise(weight = mean(weight)) %>% dplyr::select(weight) # this is the code to change - when changing the thing you want to plot. #meanVCO2 <- day %>% group_by(Animal_ID) %>% summarise(VCO2 = mean(VO2)) %>% dplyr::select(VCO2) ```