Description
Bayesian Methods for Change Points Analysis.
Description
Perform change points detection on univariate and multivariate time series according to the methods presented by Asael Fabian Martínez and Ramsés H. Mena (2014) <doi:10.1214/14-BA878> and Corradin, Danese and Ongaro (2022) <doi:10.1016/j.ijar.2021.12.019>. It also clusters different types of time dependent data with common change points, see "Model-based clustering of time-dependent observations with common structural changes" (Corradin,Danese,KhudaBukhsh and Ongaro, 2024) <doi:10.48550/arXiv.2410.09552> for details.
README.md
BayesChange
BayesChange
provides C++ functions to perform Bayesian change points analysis.
Installation
To install BayesChange
the package devtools
is needed.
install.packages("devtools")
Now BayesChange
can be installed through the GitHub repository of the package:
devtools::install_github("lucadanese/BayesChange")
Package contents
The package contains two main functions:
detect_cp
detect change points on univariate and multivariate time series.clust_cp
cluster time series or survival functions with common change points.
Additional methods and functions are included:
print()
andsummary()
give informations about the algorithm.posterior_estimate()
estimates the change points or the final partition of the data.plot()
provides a graphical representation of the results.sim_epi_data
generates an arbitrary number of simulated survival functions.
Detect change points
library(BayesChange)
## Univariate time series
data_vec <- as.numeric(c(rnorm(50,0,0.1), rnorm(50,1,0.25)))
out <- detect_cp(data = data_vec, n_iterations = 2500, n_burnin = 500,
params = list(q = 0.25, phi = 0.1, a = 1, b = 1, c = 0.1))
print(out)
summary(out)
posterior_estimate(out)
plot(out)
## Multivariate time series
data_mat <- matrix(NA, nrow = 3, ncol = 100)
data_mat[1,] <- as.numeric(c(rnorm(50,0,0.100), rnorm(50,1,0.250)))
data_mat[2,] <- as.numeric(c(rnorm(50,0,0.125), rnorm(50,1,0.225)))
data_mat[3,] <- as.numeric(c(rnorm(50,0,0.175), rnorm(50,1,0.280)))
out <- detect_cp(data = data_mat, n_iterations = 2500, n_burnin = 500,
params = list(q = 0.25, k_0 = 0.25, nu_0 = 4, phi_0 = diag(1,3,3), m_0 = rep(0,3),
par_theta_c = 2, par_theta_d = 0.2, prior_var_gamma = 0.1))
print(out)
summary(out)
posterior_estimate(out)
plot(out)
Cluster time dependent data with common change points
## Univariate time series
data_mat <- matrix(NA, nrow = 5, ncol = 100)
data_mat[1,] <- as.numeric(c(rnorm(50,0,0.100), rnorm(50,1,0.250)))
data_mat[2,] <- as.numeric(c(rnorm(50,0,0.125), rnorm(50,1,0.225)))
data_mat[3,] <- as.numeric(c(rnorm(50,0,0.175), rnorm(50,1,0.280)))
data_mat[4,] <- as.numeric(c(rnorm(25,0,0.135), rnorm(75,1,0.225)))
data_mat[5,] <- as.numeric(c(rnorm(25,0,0.155), rnorm(75,1,0.280)))
out <- clust_cp(data = data_mat, n_iterations = 5000, n_burnin = 1000,
params = list(L = 1, B = 1000, gamma = 0.5), kernel = "ts")
print(out)
summary(out)
posterior_estimate(out)
plot(out)
## Multivariate time series
data_array <- array(data = NA, dim = c(3,100,5))
data_array[1,,1] <- as.numeric(c(rnorm(50,0,0.100), rnorm(50,1,0.250)))
data_array[2,,1] <- as.numeric(c(rnorm(50,0,0.100), rnorm(50,1,0.250)))
data_array[3,,1] <- as.numeric(c(rnorm(50,0,0.100), rnorm(50,1,0.250)))
data_array[1,,2] <- as.numeric(c(rnorm(50,0,0.100), rnorm(50,1,0.250)))
data_array[2,,2] <- as.numeric(c(rnorm(50,0,0.100), rnorm(50,1,0.250)))
data_array[3,,2] <- as.numeric(c(rnorm(50,0,0.100), rnorm(50,1,0.250)))
data_array[1,,3] <- as.numeric(c(rnorm(50,0,0.175), rnorm(50,1,0.280)))
data_array[2,,3] <- as.numeric(c(rnorm(50,0,0.175), rnorm(50,1,0.280)))
data_array[3,,3] <- as.numeric(c(rnorm(50,0,0.175), rnorm(50,1,0.280)))
data_array[1,,4] <- as.numeric(c(rnorm(25,0,0.135), rnorm(75,1,0.225)))
data_array[2,,4] <- as.numeric(c(rnorm(25,0,0.135), rnorm(75,1,0.225)))
data_array[3,,4] <- as.numeric(c(rnorm(25,0,0.135), rnorm(75,1,0.225)))
data_array[1,,5] <- as.numeric(c(rnorm(25,0,0.155), rnorm(75,1,0.280)))
data_array[2,,5] <- as.numeric(c(rnorm(25,0,0.155), rnorm(75,1,0.280)))
data_array[3,,5] <- as.numeric(c(rnorm(25,0,0.155), rnorm(75,1,0.280)))
out <- clust_cp(data = data_array, n_iterations = 3000, n_burnin = 1000,
params = list(B = 1000, L = 1, gamma = 0.5, k_0 = 0.25,
nu_0 = 5, phi_0 = diag(0.1,3,3),
m_0 = rep(0,3)), kernel = "ts")
print(out)
summary(out)
posterior_estimate(out)
plot(out)
## Epidemiological data
data_mat <- matrix(NA, nrow = 5, ncol = 50)
betas <- list(c(rep(0.45, 25),rep(0.14,25)),
c(rep(0.55, 25),rep(0.11,25)),
c(rep(0.50, 25),rep(0.12,25)),
c(rep(0.52, 10),rep(0.15,40)),
c(rep(0.53, 10),rep(0.13,40)))
inf_times <- list()
for(i in 1:5){
inf_times[[i]] <- sim_epi_data(10000, 10, 50, betas[[i]], 1/8)
vec <- rep(0,50)
names(vec) <- as.character(1:50)
for(j in 1:50){
if(as.character(j) %in% names(table(floor(inf_times[[i]])))){
vec[j] = table(floor(inf_times[[i]]))[which(names(table(floor(inf_times[[i]]))) == j)]
}
}
data_mat[i,] <- vec
}
out <- clust_cp(data = data_mat, n_iterations = 3000, n_burnin = 1000,
params = list(M = 250, L = 1, B = 1000), kernel = "epi")
print(out)print(out)
summary(out)
posterior_estimate(out)
plot(out)