Changepoint analysis

--- tags: Rscripts --- # Changepoint analysis ### Author: Jocelyn P. Colella ### Example changepoint analysis for BL Females (energy expenditure, EE) - Next step: loop this through all datasets and all dependent variables RESULTS - 4 to 8 changepoints - 4 changepoints clearly delineate transitions - Max-likelihood: 6:50, 7:43, 18:55, 19:58 (time of day) - Bayesian: 5:18, 6:56, 18:55, 19:58 (time of day) ML plot, 4 changepoints: ![](https://i.imgur.com/tNauBmi.jpg) Bayesian plot, 4 changepoints: ![](https://i.imgur.com/koK6TIZ.jpg) ``` # Script by : JPC # GOAL: estimate number and locations of major transitions library(changepoint) library(mcp) library(EnvCpt) #Sort data by time BL_noOL_F_sort <- BL_noOL_F %>% arrange(time_in_S) # Estimate changepoints using the CROPS penalty method out=cpt.meanvar(BL_noOL_F_sort$EE,pen.value=c(20, 150), Q=4, penalty="CROPS",method="PELT") # How many segments/changepoints are optimal? plot(out,diagnostic=TRUE) #Where are the change points? cpts.full(out) #Rescale to changepoints from arbitrary changepoint units 0-1900 to TIME (s) changept_scale <- 1900 ### IS THERE A MORE ACCURATE WAY TO IDENTIFY THIS??? sec_in_data <- max(BL_noOL_F_sort$time_in_S) - min(BL_noOL_F_sort$time_in_S) rescaled_intercepts_s = c() rescaled_intercepts_hr_min = c() #For each of the 4 changepoints (change if using a different number of changepoints) rescale to hr:min for(i in 1:4){ this_intercept <- cpts.full(out)[4,][[i]] secs <- as.numeric((sec_in_data * this_intercept)/changept_scale) #convert to s rescaled_intercepts_s <- c(rescaled_intercepts, secs) #add to list of converted s hrmin <- as.character(seconds_to_period(secs)) hrmin_parts <- strsplit(hrmin, " ") hr <- hrmin_parts[[1]][1] hr_num <- as.numeric(gsub("([0-9]+).*$", "\\1", hr)) #extract the hour number min <- hrmin_parts[[1]][2] min_num <- as.numeric(gsub("([0-9]+).*$", "\\1", min)) #extract the minutes number this_hrmin <- paste0(hr_num,":", min_num) #combine hours:minutes to look like a time #print(this_hrmin) rescaled_intercepts_hr_min <- c(rescaled_intercepts_hr_min, this_hrmin) #add constructed labels to a list } rescaled_intercepts_s #20822.40 26438.40 64756.80 68385.60 71927.35 rescaled_intercepts_hr_min #"6:50" "7:43" "18:55" "19:58" # add a zero to the "6:5" change point so that it looks like a true TIME - this will only be necessary for trailing 0's rescaled_intercepts_hr_min[1] <- "6:50" #Basic plot that we're going to improve plot(out,ncpts=4) # JPC-improved PLOT with means and changepoints: locations <- c() locations <- c(floor(0), floor(as.numeric((changept_scale*21600)/sec_in_data)), #NOTE THIS FUNCTION IS REVERSED FROM THE ABOVE FUNCTION to BACK calculate arbitrary location for ticks along the xaxis, based on specific time intervals (lables) that we want to display floor(as.numeric((changept_scale * 43200)/sec_in_data)), floor(as.numeric((changept_scale * 64800)/sec_in_data)), floor(1900)) locations plot(out, ncpts = 4, type = "p", pch = 16, cex = 0.7, col = alpha("grey31", 0.5), cpt.width = 3, xaxt='n', xlab="Time (s)", ylab="EE", frame.plot = FALSE, xlim=c(0, 1900), ylim=c(0, 0.5)) # initially lots without bottom axis (that is added later) axis(1, at = locations, labels = c("0", "21600", "43200", "64800", "86400")) #labels in actual seconds, selected for cleanliness (divide evenly into 86400) abline(v = c(482, 612, 1499, 1583 ), col="red", lty=2, lwd =2) text(c(485, 900, 1500, 1900), c(rep(0.5, length(rescaled_intercepts_hr_min))), rescaled_intercepts_hr_min, pos=2) ``` ``` ######### # BAYESIAN CHANGEPOINT ESTIMATE fit_bcp = bcp(BL_noOL_F_sort$EE, d = 5) plot(fit_bcp) #convert change point locations to real time sec_in_data <- max(BL_noOL_F_sort$time_in_S) - min(BL_noOL_F_sort$time_in_S) B_changepoints= c(420, 550, 1500, 1583) #from plot B_rescaled_intercepts_s = c() B_rescaled_intercepts_hr_min = c() for(i in 1:4){ this_intercept <- B_changepoints[i] print(this_intercept) secs <- as.numeric((sec_in_data * this_intercept)/changept_scale) B_rescaled_intercepts_s <- c(B_rescaled_intercepts_s, secs) hrmin <- as.character(seconds_to_period(secs)) hrmin_parts <- strsplit(hrmin, " ") hr <- hrmin_parts[[1]][1] hr_num <- as.numeric(gsub("([0-9]+).*$", "\\1", hr)) min <- hrmin_parts[[1]][2] min_num <- as.numeric(gsub("([0-9]+).*$", "\\1", min)) this_hrmin <- paste0(hr_num,":", min_num) print(this_hrmin) B_rescaled_intercepts_hr_min <- c(B_rescaled_intercepts_hr_min, this_hrmin) } B_rescaled_intercepts_s #19083.69 24990.55 68156.05 71927.35 B_rescaled_intercepts_hr_min #"5:18", "6:56", "18:55", "19:58" # JPC-improved plot: legacyplot <- function(x, ...) { if ((is.matrix(x$data) && ncol(x$data)>2 ) && !(attr(x, "model") == "multivariate" || attr(x, "structure") == "series")) stop("Legacy bcp plot invalid for multivariate bcp object or graph bcp object.") posterior.prob <- x$posterior.prob posterior.prob[length(posterior.prob)] <- 0 op <- par(mfrow=c(2,1),col.lab="black",col.main="black") op2 <- par(mar=c(0,4,4,2),xaxt="n", cex.axis=0.75) #UPPER PLOT plot(1:nrow(x$data), x$data[,2], col=alpha("grey", 0.7), pch=20, cex = 1, cex.axis = 1, xlab="", ylab="Posterior Mean", main="Posterior Means and Probabilities of a Change", ...) lines(x$posterior.mean, lwd=2, col = "black") par(op2) op3 <- par(mar=c(5,4,0,2), xaxt="s", cex.axis=0.75) #LOWER PLOT plot(1:length(x$posterior.mean), posterior.prob, yaxt="n", xaxt='n', type="l", ylim=c(0,1),lwd = 2, xlab="Time (24-hour)", ylab="Posterior Probability", main="") #Bayesian change points abline(v = c(420, 550, 1500, 1583 ), col="red", lty=2, lwd =2) text(c(400, 800, 1500, 1900), c(rep(0.95, length(B_rescaled_intercepts_hr_min))), B_rescaled_intercepts_hr_min, pos=2, cex.axis = 1) axis(1, at = locations, labels = c("0", "6", "12", "18", "24"), cex.axis = 1) axis(2, yaxp=c(0, 0.8, 2), cex.axis = 1) par(op3) par(op) } legacyplot(fit_bcp)