---
tags: Rscripts
---
# Changepoint analysis
### Author: Jocelyn P. Colella
### Example changepoint analysis for BL Females (energy expenditure, EE)
- Next step: loop this through all datasets and all dependent variables
RESULTS
- 4 to 8 changepoints
- 4 changepoints clearly delineate transitions
- Max-likelihood: 6:50, 7:43, 18:55, 19:58 (time of day)
- Bayesian: 5:18, 6:56, 18:55, 19:58 (time of day)
ML plot, 4 changepoints:

Bayesian plot, 4 changepoints:

```
# Script by : JPC
# GOAL: estimate number and locations of major transitions
library(changepoint)
library(mcp)
library(EnvCpt)
#Sort data by time
BL_noOL_F_sort <- BL_noOL_F %>% arrange(time_in_S)
# Estimate changepoints using the CROPS penalty method
out=cpt.meanvar(BL_noOL_F_sort$EE,pen.value=c(20, 150), Q=4, penalty="CROPS",method="PELT")
# How many segments/changepoints are optimal?
plot(out,diagnostic=TRUE)
#Where are the change points?
cpts.full(out)
#Rescale to changepoints from arbitrary changepoint units 0-1900 to TIME (s)
changept_scale <- 1900 ### IS THERE A MORE ACCURATE WAY TO IDENTIFY THIS???
sec_in_data <- max(BL_noOL_F_sort$time_in_S) - min(BL_noOL_F_sort$time_in_S)
rescaled_intercepts_s = c()
rescaled_intercepts_hr_min = c()
#For each of the 4 changepoints (change if using a different number of changepoints) rescale to hr:min
for(i in 1:4){
this_intercept <- cpts.full(out)[4,][[i]]
secs <- as.numeric((sec_in_data * this_intercept)/changept_scale) #convert to s
rescaled_intercepts_s <- c(rescaled_intercepts, secs) #add to list of converted s
hrmin <- as.character(seconds_to_period(secs))
hrmin_parts <- strsplit(hrmin, " ")
hr <- hrmin_parts[[1]][1]
hr_num <- as.numeric(gsub("([0-9]+).*$", "\\1", hr)) #extract the hour number
min <- hrmin_parts[[1]][2]
min_num <- as.numeric(gsub("([0-9]+).*$", "\\1", min)) #extract the minutes number
this_hrmin <- paste0(hr_num,":", min_num) #combine hours:minutes to look like a time
#print(this_hrmin)
rescaled_intercepts_hr_min <- c(rescaled_intercepts_hr_min, this_hrmin) #add constructed labels to a list
}
rescaled_intercepts_s
#20822.40 26438.40 64756.80 68385.60 71927.35
rescaled_intercepts_hr_min
#"6:50" "7:43" "18:55" "19:58"
# add a zero to the "6:5" change point so that it looks like a true TIME - this will only be necessary for trailing 0's
rescaled_intercepts_hr_min[1] <- "6:50"
#Basic plot that we're going to improve
plot(out,ncpts=4)
# JPC-improved PLOT with means and changepoints:
locations <- c()
locations <- c(floor(0),
floor(as.numeric((changept_scale*21600)/sec_in_data)), #NOTE THIS FUNCTION IS REVERSED FROM THE ABOVE FUNCTION to BACK calculate arbitrary location for ticks along the xaxis, based on specific time intervals (lables) that we want to display
floor(as.numeric((changept_scale * 43200)/sec_in_data)),
floor(as.numeric((changept_scale * 64800)/sec_in_data)),
floor(1900))
locations
plot(out, ncpts = 4, type = "p", pch = 16, cex = 0.7,
col = alpha("grey31", 0.5), cpt.width = 3, xaxt='n',
xlab="Time (s)", ylab="EE",
frame.plot = FALSE, xlim=c(0, 1900), ylim=c(0, 0.5)) # initially lots without bottom axis (that is added later)
axis(1, at = locations, labels = c("0", "21600", "43200", "64800", "86400")) #labels in actual seconds, selected for cleanliness (divide evenly into 86400)
abline(v = c(482, 612, 1499, 1583 ), col="red", lty=2, lwd =2)
text(c(485, 900, 1500, 1900), c(rep(0.5, length(rescaled_intercepts_hr_min))), rescaled_intercepts_hr_min, pos=2)
```
```
#########
# BAYESIAN CHANGEPOINT ESTIMATE
fit_bcp = bcp(BL_noOL_F_sort$EE, d = 5)
plot(fit_bcp)
#convert change point locations to real time
sec_in_data <- max(BL_noOL_F_sort$time_in_S) - min(BL_noOL_F_sort$time_in_S)
B_changepoints= c(420, 550, 1500, 1583) #from plot
B_rescaled_intercepts_s = c()
B_rescaled_intercepts_hr_min = c()
for(i in 1:4){
this_intercept <- B_changepoints[i]
print(this_intercept)
secs <- as.numeric((sec_in_data * this_intercept)/changept_scale)
B_rescaled_intercepts_s <- c(B_rescaled_intercepts_s, secs)
hrmin <- as.character(seconds_to_period(secs))
hrmin_parts <- strsplit(hrmin, " ")
hr <- hrmin_parts[[1]][1]
hr_num <- as.numeric(gsub("([0-9]+).*$", "\\1", hr))
min <- hrmin_parts[[1]][2]
min_num <- as.numeric(gsub("([0-9]+).*$", "\\1", min))
this_hrmin <- paste0(hr_num,":", min_num)
print(this_hrmin)
B_rescaled_intercepts_hr_min <- c(B_rescaled_intercepts_hr_min, this_hrmin)
}
B_rescaled_intercepts_s
#19083.69 24990.55 68156.05 71927.35
B_rescaled_intercepts_hr_min
#"5:18", "6:56", "18:55", "19:58"
# JPC-improved plot:
legacyplot <- function(x, ...) {
if ((is.matrix(x$data) && ncol(x$data)>2 ) &&
!(attr(x, "model") == "multivariate" || attr(x, "structure") == "series"))
stop("Legacy bcp plot invalid for multivariate bcp object or graph bcp object.")
posterior.prob <- x$posterior.prob
posterior.prob[length(posterior.prob)] <- 0
op <- par(mfrow=c(2,1),col.lab="black",col.main="black")
op2 <- par(mar=c(0,4,4,2),xaxt="n", cex.axis=0.75)
#UPPER PLOT
plot(1:nrow(x$data), x$data[,2], col=alpha("grey", 0.7),
pch=20, cex = 1, cex.axis = 1, xlab="", ylab="Posterior Mean",
main="Posterior Means and Probabilities of a Change", ...)
lines(x$posterior.mean, lwd=2, col = "black")
par(op2)
op3 <- par(mar=c(5,4,0,2), xaxt="s", cex.axis=0.75)
#LOWER PLOT
plot(1:length(x$posterior.mean), posterior.prob,
yaxt="n", xaxt='n', type="l", ylim=c(0,1),lwd = 2,
xlab="Time (24-hour)", ylab="Posterior Probability", main="")
#Bayesian change points
abline(v = c(420, 550, 1500, 1583 ), col="red", lty=2, lwd =2)
text(c(400, 800, 1500, 1900),
c(rep(0.95, length(B_rescaled_intercepts_hr_min))), B_rescaled_intercepts_hr_min,
pos=2, cex.axis = 1)
axis(1, at = locations, labels = c("0", "6", "12", "18", "24"), cex.axis = 1)
axis(2, yaxp=c(0, 0.8, 2), cex.axis = 1)
par(op3)
par(op)
}
legacyplot(fit_bcp)