Load packages
library(tidyverse)
library(astsa)
library(fpp3)
library(epidatasets)
Lagged regression
y = as.numeric(cmort) # Response vector
lags = 4 * 0:10 # Lags 0, 4, 8, 12, ... 40
# Build predictor matrix, and run linear regression
x = matrix(NA, length(y), length(lags))
for (j in 1:length(lags)) {
x[,j] = dplyr::lag(as.numeric(part), lags[j])
}
reg = lm(y ~ x)
coef(reg)
## (Intercept) x1 x2 x3 x4 x5
## 54.840747976 0.218774708 0.146177521 0.146738025 0.105053522 0.038851794
## x6 x7 x8 x9 x10 x11
## 0.004758640 0.046766034 0.043647238 0.005335550 -0.034166678 -0.006548196
Ridge
library(glmnet) # Implements ridge and lasso estimators
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Loaded glmnet 4.1-8
# We'll need to omit NA values from x (and y, correspondingly) explicitly, as
# otherwise glmnet will complain
na_obs = 1:max(lags)
x = x[-na_obs, ]
y = y[-na_obs]
# Ridge regression: set alpha = 0, lambda sequence will be chosen automatically
ridge = glmnet(x, y, alpha = 0)
beta_ridge = coef(ridge)
lambda_ridge = ridge$lambda
dim(beta_ridge) # One row per coefficient, one column per lambda value
## [1] 12 100
beta_ridge[1:3, 1:5] # E.g., first 3 coefficients for first 5 lambda values
## 3 x 5 sparse Matrix of class "dgCMatrix"
## s0 s1 s2 s3 s4
## (Intercept) 8.849359e+01 8.845060e+01 8.844641e+01 8.844182e+01 8.843678e+01
## V1 3.052009e-37 6.119754e-04 6.712496e-04 7.362242e-04 8.074384e-04
## V2 3.614721e-37 7.241482e-04 7.942171e-04 8.710100e-04 9.551601e-04
matplot(log(lambda_ridge), t(beta_ridge[-1, ]), type = "l", col = 1:8,
xlab = "log(lambda)", ylab = "Coefficient estimate", main = "Ridge")
legend("topright", legend = paste("Lag", lags), lty = 1:5, col = 1:8)
Lasso
# Lasso regression: set alpha = 1, and let glmnet() choose a lambda sequence
# itself, automatically
lasso = glmnet(x, y, alpha = 1)
beta_lasso = coef(lasso)
lambda_lasso = lasso$lambda
dim(beta_lasso) # One row per coefficient, one column per lambda value
## [1] 12 66
beta_lasso[1:3, 1:5] # E.g., first 3 coefficients for first 5 lambda values
## 3 x 5 sparse Matrix of class "dgCMatrix"
## s0 s1 s2 s3 s4
## (Intercept) 88.49359 86.64912678 84.88663639 83.28028242 81.81704580
## V1 . . . . .
## V2 . 0.01618761 0.03490427 0.05197942 0.06751764
matplot(lambda_lasso, t(beta_lasso[-1, ]), type = "l", col = 1:8,
xlab = "lambda", ylab = "Coefficient estimate", main = "Lasso")
legend("topright", legend = paste("Lag", lags), lty = 1:5, col = 1:8)
Moving average
m = 10
wgts = rep(1, m) / m
ma = stats::filter(soi, sides = 2, filter = wgts)
plot(soi, col = 8, ylab = "Southern Oscillation Index",
main = "Moving average")
lines(ma, lwd = 2, col = 4)
par(fig = c(0.55, 1, 0.55, 1), new = TRUE) # The top-right insert
nwgts = c(rep(0, 20), wgts, rep(0, 20))
plot(nwgts, type = "l", ylim = c(-0.02, 0.1),
xaxt = "n", yaxt = "n", ann = FALSE)
Kernel smooth
ks = ksmooth(time(soi), soi, kernel = "normal", bandwidth = 1)
plot(soi, col = 8, ylab = "Southern Oscillation Index",
main = "Kernel smoother")
lines(ks, lwd = 2, col = 4)
par(fig = c(0.55, 1, 0.55, 1), new = TRUE) # The top-right insert
curve(dnorm(x), type = "l", xlim = c(-3, 3), ylim = c(-0.02, 0.45),
xaxt = "n", yaxt = "n", ann = FALSE)
HP filter
boston = boston_marathon |>
filter(Year >= 1924) |>
filter(Event == "Men's open division") |>
mutate(Minutes = as.numeric(Time)/60) |>
select(Year, Minutes)
n = nrow(boston)
D = diag(rep(-2,n)) # -2s on the diagonal
D[row(D) == col(D)-1] = 1 # 1s above the diagonal
D[row(D) == col(D)+1] = 1 # 1s below the diagonal
D = D[-c(1,n), ] # Drop first and last row
D[1:6, 1:6] # Take a quick look
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 1 -2 1 0 0 0
## [2,] 0 1 -2 1 0 0
## [3,] 0 0 1 -2 1 0
## [4,] 0 0 0 1 -2 1
## [5,] 0 0 0 0 1 -2
## [6,] 0 0 0 0 0 1
I = diag(n) # n x n identity matrix
# Compute and plot HP filter solution at a few lambda values
lam = c(10, 100, 1000)
hp = matrix(NA, nrow = n, ncol = length(lam))
for (i in 1:length(lam)) {
hp[ ,i] = solve(I + lam[i] * t(D) %*% D, boston$Minutes)
}
plot(boston$Year, boston$Minutes, col = 8,
xlab = "Year", ylab = "Time", main = "Hodrick-Prescott filter")
matplot(boston$Year, hp, type = "l", lty = 1, lwd = 2, col = 2:4, add = TRUE)
legend("topright", paste("lambda =", lam), lty = 1, lwd = 2, col = 2:4)
Trend filter
library(glmgen) # Implements trend filtering
# Compute and plot trend filter solution at a few lambda values: the lambda
# sequence will be chosen automatically (as in glmnet)
tf = trendfilter(boston$Minutes, k = 1)
ind = c(20, 10, 1)
plot(boston$Year, boston$Minutes, col = 8,
xlab = "Year", ylab = "Time", main = "Trend filter")
matplot(boston$Year, tf$beta[, ind], type = "l",
lty = 1, lwd = 2, col = 2:4, add = TRUE)
# knots = which(abs(D %*% tf$beta[, ind[2]]) > 1e-5) + 1
# abline(v = boston$Year[knots], lty = 2, col = 3)
legend("topright", paste("lambda =", round(tf$lambda[ind])),
lty = 1, lwd = 2, col = 2:4)