26 Stationarity tests
26.1 Dickey–Fuller test
The Dickey–Fuller test tests the null hypothesis that a unit root is present in an auto regressive (AR) model. The alternative hypothesis is different depending on which version of the test is used, usually is “stationary” or “trend-stationary”. Let’s consider an AR(1) model, i.e.
26.2 Augmented Dickey–Fuller test
The augmented Dickey–Fuller is a more general version of the Dickey–Fuller test for a general AR(p) model, i.e.
26.3 Kolmogorov-Smirnov test
The Kolmogorov–Smirnov two-sample test (KS) can be used to test whether two samples came from the same distribution. Let’s define the empirical distribution function
To apply the test in a time series settings, use a random index to split the original series in two sub-series. Then the KS can be applied as usual.
26.3.1 Examples
Example 26.1 Let’s consider 500 simulated observations of the random variable
KS-test on a stationary time series
# ================ Setups ================
set.seed(5) # random seed
ci <- 0.05 # confidence level (alpha)
n <- 500 # number of simulations
x <- rnorm(n, 0.4, 1) # stationary series
# ========================================
# Random time for splitting
t_split <- sample(n, 1)
# Split the time series
x1 <- x[1:t_split]
x2 <- x[(t_split+1):n]
# Number of elements for each sub-series
n1 <- length(x1)
n2 <- length(x2)
# Grid of values for computing KS-statistic
x_min <- quantile(x, 0.015)
x_max <- quantile(x, 0.985)
grid <- seq(x_min, x_max, length.out = 200)
# Empirical cdfs
cdf_n1 <- ecdf(x1)
cdf_n2 <- ecdf(x2)
# KS-statistic
ks_stat <- max(abs(cdf_n1(grid) - cdf_n2(grid)))
# Rejection level with probability alpha
rejection_lev <- sqrt(-0.5*log(ci/2))*sqrt((n1+n2)/(n1*n2))
# P-value
p.value <- exp(- (2 * n2) / (1 + n1/n2) * ks_stat^2)
KS-test plot
y_breaks <- seq(0, 1, 0.2)
y_labels <- paste0(format(y_breaks*100, digits = 2), "%")
grid_max <- grid[which.max(abs(cdf_n1(grid) - cdf_n2(grid)))]
ggplot()+
geom_ribbon(aes(grid, ymax = cdf_n1(grid), ymin = cdf_n2(grid)),
alpha = 0.5, fill = "green") +
geom_line(aes(grid, cdf_n1(grid)))+
geom_line(aes(grid, cdf_n2(grid)), color = "red")+
geom_segment(aes(x = grid_max, xend = grid_max, y = cdf_n1(grid_max), yend = cdf_n2(grid_max)),
linetype = "solid", color = "magenta")+
geom_point(aes(grid_max, cdf_n1(grid_max)), color = "magenta")+
geom_point(aes(grid_max, cdf_n2(grid_max)), color = "magenta")+
scale_y_continuous(breaks = y_breaks, labels = y_labels)+
labs(x = "x", y = "cdf")+
theme_bw()
KS-test (stationary)
kab <- tibble(
t_split = t_split,
ci = ci,
n1 = n1,
n2 = n2,
KS = ks_stat,
p.value = p.value,
rejection_lev = rejection_lev,
H0 = ifelse(KS > rejection_lev, "Rejected", "Non-Rejected")
) %>%
mutate_if(is.numeric, format, digits = 4, scientific = FALSE)
colnames(kab) <- c("$\\textbf{Index split}$","$\\alpha$", "$n_1$", "$n_2$",
"$KS_{n_1, n_2}$", "p.value", "$\\textbf{Critical level}$", "$H_0$")
knitr::kable(kab, escape = FALSE)
Example 26.2 Let’s consider 250 simulated observations of the random variable
KS-test on a non-stationary time series
# ============== Setups ==============
set.seed(2) # random seed
ci <- 0.05 # confidence level (alpha)
n <- 500 # number of simulations
# Simulated non-stationary sample
x1 <- rnorm(n/2, 0, 1)
x2 <- rnorm(n/2, 0.3, 1)
x <- c(x1, x2)
# ====================================
# Random split of the time series
t_split <- sample(n, 1)
x1 <- x[1:t_split]
x2 <- x[(t_split+1):n]
# Number of elements for each sub sample
n1 <- length(x1)
n2 <- length(x2)
# Grid of values for KS-statistic
grid <- seq(quantile(x, 0.015), quantile(x, 0.985), 0.01)
# Empiric cdfs
cdf_1 <- ecdf(x1)
cdf_2 <- ecdf(x2)
# KS-statistic
ks_stat <- max(abs(cdf_1(grid) - cdf_2(grid)))
# Rejection level
rejection_lev <- sqrt(-0.5*log(ci/2))*sqrt((n1+n2)/(n1*n2))
# P-value
p.value <- exp(- (2 * n2) / (1 + n1/n2) * ks_stat^2)
KS-test plot
y_breaks <- seq(0, 1, 0.2)
y_labels <- paste0(format(y_breaks*100, digits = 2), "%")
grid_max <- grid[which.max(abs(cdf_1(grid) - cdf_2(grid)))]
ggplot()+
geom_ribbon(aes(grid, ymax = cdf_1(grid), ymin = cdf_2(grid)),
alpha = 0.5, fill = "green") +
geom_line(aes(grid, cdf_1(grid)))+
geom_line(aes(grid, cdf_2(grid)), color = "red")+
geom_segment(aes(x = grid_max, xend = grid_max,
y = cdf_1(grid_max), yend = cdf_2(grid_max)),
linetype = "solid", color = "magenta")+
geom_point(aes(grid_max, cdf_1(grid_max)), color = "magenta")+
geom_point(aes(grid_max, cdf_2(grid_max)), color = "magenta")+
scale_y_continuous(breaks = y_breaks, labels = y_labels)+
labs(x = "x", y = "cdf")+
theme_bw()
KS-test (non-stationary)
kab <- dplyr::tibble(
t_split = t_split,
ci = ci,
n1 = n1,
n2 = n2,
KS = ks_stat,
p.value = p.value,
rejection_lev = rejection_lev,
H0 = ifelse(KS > rejection_lev, "Rejected", "Non-Rejected")) %>%
mutate_if(is.numeric, format, digits = 4, scientific = FALSE)
colnames(kab) <- c("$\\textbf{Index split}$","$\\alpha$", "$n_1$", "$n_2$",
"$KS_{n_1, n_2}$", "p.value", "$\\textbf{Critical level}$", "$H_0$")
knitr::kable(kab, escape = FALSE)