import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
import cvxpy as cp
import mosekSpectrum Model¶
We are working with the model: for (here ), where denote the DFT of the observed data . The parameters underlying this model are . The model can be written in terms of the periodogram as
where . The mean of the periodogram is called the power of frequency . The relation between and is given by
If we treat as a function on (say by joining successive values by lines), we obtain the power spectral density function.
Application One: Sunspots Dataset¶
sunspots = pd.read_csv('SN_y_tot_V2.0.csv', header = None, sep = ';')
y = sunspots.iloc[:,1].values
n = len(y)
plt.figure(figsize = (12, 6))
plt.plot(y)
plt.show()
def periodogram(y):
fft_y = np.fft.fft(y)
n = len(y)
fourier_freqs = np.arange(1/n, 1/2, 1/n)
m = len(fourier_freqs)
pgram_y = (np.abs(fft_y[1:m+1]) ** 2)/n
return fourier_freqs, pgram_yfreqs, pgram = periodogram(y)
plt.figure(figsize = (12, 6))
plt.plot(freqs, pgram)
plt.title('Periodogram')
plt.show()
freqs, pgram = periodogram(y)
plt.figure(figsize = (12, 6))
plt.plot(freqs, np.log(pgram))
plt.title('Log Periodogram')
plt.show()
Note that the periodogram above is quite rough and wiggly (noisy). The spectrum model can be used to smooth it. With proper regularization, we will get estimates of the mean of the periodogram which smooth the trend in the periodogram without being noisy.
To estimate in the spectrum model, we use the following estimators (below )
or
def spectrum_estimator_ridge(y, lambda_val):
freq, I = periodogram(y)
m = len(freq)
n = len(y)
alpha = cp.Variable(m)
neg_likelihood_term = cp.sum(cp.multiply((n * I / 2), cp.exp(-2 * alpha)) + 2*alpha)
smoothness_penalty = cp.sum(cp.square(alpha[2:] - 2 * alpha[1:-1] + alpha[:-2]))
objective = cp.Minimize(neg_likelihood_term + lambda_val * smoothness_penalty)
problem = cp.Problem(objective)
problem.solve(solver = cp.MOSEK)
return alpha.value, freq
def spectrum_estimator_lasso(y, lambda_val):
freq, I = periodogram(y)
m = len(freq)
n = len(y)
alpha = cp.Variable(m)
neg_likelihood_term = cp.sum(cp.multiply((n * I / 2), cp.exp(-2 * alpha)) + 2*alpha)
smoothness_penalty = cp.sum(cp.abs(alpha[2:] - 2 * alpha[1:-1] + alpha[:-2]))
objective = cp.Minimize(neg_likelihood_term + lambda_val * smoothness_penalty)
problem = cp.Problem(objective)
problem.solve(solver = cp.MOSEK)
return alpha.value, freqThe estimate of can be converted to the power by
alpha_opt_ridge, freq = spectrum_estimator_ridge(y, 200)
power_ridge = (2/n)*(np.exp(2*alpha_opt_ridge))
alpha_opt_lasso, freq = spectrum_estimator_lasso(y, 10)
power_lasso = (2/n)*(np.exp(2*alpha_opt_lasso))
plt.figure(figsize = (12, 6))
#markerline, stemline, baseline = plt.stem(freq, pgram, linefmt = 'lightblue', basefmt = '')
#markerline.set_marker("None")
plt.plot(freq, pgram)
plt.title('Periodogram and the Power Spectrum')
plt.plot(freq, power_ridge, color = 'red', label = 'Ridge')
plt.plot(freq, power_lasso, color = 'black', label = "LASSO")
plt.legend()
plt.show()
#Mosek sometimes gives some format conversion warnings which you can ignore
plt.figure(figsize = (12, 6))
#markerline, stemline, baseline = plt.stem(freq, np.log(pgram), linefmt = 'lightblue', basefmt = '')
#markerline.set_marker("None")
plt.plot(freq, np.log(pgram), color = 'lightblue')
plt.title('Log Periodogram and Log Power Spectrum')
plt.plot(freq, np.log(power_ridge), color = 'red', label = 'Ridge')
plt.plot(freq, np.log(power_lasso), color = 'black', label = "LASSO")
plt.legend()
plt.show()

There is a whole band of frequencies which contribute towards the mode in the power spectrum.
Application Two: Southern Oscillation Index Dataset¶
This dataset can be downloaded from http://
Wikipedia article for El Niño Southern Oscillation has this sentence: “El Niño and Li Niña last a year or so and typically occur every two to seven years with varying intensity, with neutral periods of lower intensity interspersed.”
def load_soi_data(filepath):
data = []
with open(filepath, 'r') as f:
for line in f:
line = line.strip()
if not line:
continue # skip blank lines if any
date_str, soi_str = line.split(',')
date_code = int(date_str) # e.g. 187601
year = date_code // 100 # e.g. 1876
month = date_code % 100 # e.g. 1
soi_val = float(soi_str)
data.append([year, month, soi_val])
# Create a DataFrame from parsed rows
df = pd.DataFrame(data, columns=['Year', 'Month', 'SOI'])
# Convert Year, Month to a datetime; assume day=1 for each month
df['Date'] = pd.to_datetime(df[['Year', 'Month']].assign(DAY=1))
# Make Date the index
df.set_index('Date', inplace=True)
# Sort by date just in case (and drop the now-redundant Year/Month columns)
df.sort_index(inplace=True)
return df[['SOI']]
# Usage
df_soi = load_soi_data("soi_monthly.txt")
print(df_soi) SOI
Date
1876-01-01 11.3
1876-02-01 11.0
1876-03-01 0.2
1876-04-01 9.4
1876-05-01 6.8
... ...
2024-10-01 4.2
2024-11-01 6.5
2024-12-01 10.8
2025-01-01 3.7
2025-02-01 3.0
[1790 rows x 1 columns]
y = df_soi['SOI']
n = len(y)
plt.figure(figsize = (12, 6))
plt.plot(y)
plt.show()
freqs, pgram = periodogram(y)
plt.figure(figsize = (12, 6))
plt.plot(freqs, pgram)
plt.title('Periodogram')
plt.show()
freqs, pgram = periodogram(y)
plt.figure(figsize = (12, 6))
plt.plot(freqs, np.log(pgram))
plt.title('Log Periodogram')
plt.show()
alpha_opt_ridge, freq = spectrum_estimator_ridge(y, 50000)
power_ridge = (2/n)*(np.exp(2*alpha_opt_ridge))
alpha_opt_lasso, freq = spectrum_estimator_lasso(y, 100)
power_lasso = (2/n)*(np.exp(2*alpha_opt_lasso))
plt.figure(figsize = (12, 6))
markerline, stemline, baseline = plt.stem(freq, pgram, linefmt = 'lightblue', basefmt = '')
markerline.set_marker("None")
plt.title('Periodogram and the Power Spectrum')
plt.plot(freq, power_ridge, color = 'red', label = 'Ridge')
plt.plot(freq, power_lasso, color = 'black', label = "LASSO")
plt.legend()
plt.show()

plt.figure(figsize = (12, 6))
#markerline, stemline, baseline = plt.stem(freq, np.log(pgram), linefmt = 'lightblue', basefmt = '')
#markerline.set_marker("None")
plt.plot(freq, np.log(pgram), color = 'lightblue')
plt.title('Log Periodogram and Log Power Spectrum')
plt.plot(freq, np.log(power_ridge), color = 'red', label = 'Ridge')
plt.plot(freq, np.log(power_lasso), color = 'black', label = "LASSO")
plt.legend()
plt.show()

from scipy.signal import find_peaks
# Find peaks
peaks, _ = find_peaks(np.log(power_ridge))
print("Peaks:", peaks)
print(1/freq[peaks])Peaks: [ 33 186 257 318 423 560 631 710 782 857]
[52.64705882 9.57219251 6.9379845 5.61128527 4.22169811 3.19073084
2.83227848 2.51758087 2.28607918 2.08624709]
The peak therefore corresponds to the period of about 52.64 months, which is about 4.5 years.
Application Three: Quake Vibration Dataset¶
This dataset is from the Mathworks (MATLAB) tutorial “Practical Introduction to Frequency-Domain Analysis” (see this link). From this matlab tutorial: Active Mass Driver (AMD) control systems are used to reduce vibration in a building under an earthquake. An active mass driver is placed on the top floor of the building and, based on displacement and acceleration measurements of the building floors, a control system sends signals to the driver so that the mass moves to attenuate ground disturbances. Acceleration measurements were recorded on the first floor of a three story test structure under earthquake conditions. Measurements were taken without the active mass driver control system (open loop condition), and with the active control system (closed loop condition).
from scipy.io import loadmat
mat_dict = loadmat('quakevibration.mat')
print(mat_dict.keys())dict_keys(['__header__', '__version__', '__globals__', 'gfloor1OL', 'gfloor1CL'])
gfloor1OL_array = mat_dict['gfloor1OL']
gfloor1CL_array = mat_dict['gfloor1CL']
print(gfloor1OL_array.shape)
print(gfloor1CL_array.shape)(10000, 1)
(10000, 1)
y_ol = gfloor1OL_array.ravel()
y_cl = gfloor1CL_array.ravel()fig, axes = plt.subplots(nrows = 2, ncols = 1, figsize = (12, 6))
axes[0].plot(y_ol)
axes[0].set_title('y_ol')
axes[1].plot(y_cl)
axes[1].set_title('y_cl')
plt.tight_layout()
plt.show()
n = len(y_ol)
freqs, pgram_ol = periodogram(y_ol)
alpha_opt_ridge_ol, freq = spectrum_estimator_ridge(y_ol, 100000)
pgram_mean_ridge_ol = (2/n)*(np.exp(2*alpha_opt_ridge_ol))
alpha_opt_lasso_ol, freq = spectrum_estimator_lasso(y_ol, 1000)
pgram_mean_lasso_ol = (2/n)*(np.exp(2*alpha_opt_lasso_ol))plt.figure(figsize = (12, 6))
plt.plot(freqs, (pgram_ol), color = 'lightblue', label = 'Periodogram')
#plt.plot(freqs, np.log(pgram_ol), color = 'None', label = 'Periodogram')
plt.title('Periodogram (OL)')
plt.plot(freqs, (pgram_mean_ridge_ol), color = 'red', label = 'Ridge')
plt.plot(freqs, (pgram_mean_lasso_ol), color = 'black', label = 'LASSO')
plt.legend()
plt.show()
plt.figure(figsize = (12, 6))
plt.plot(freqs, np.log(pgram_ol), color = 'lightblue', label = 'Periodogram')
#plt.plot(freqs, np.log(pgram_ol), color = 'None', label = 'Periodogram')
plt.title('Periodogram (OL)')
plt.plot(freqs, np.log(pgram_mean_ridge_ol), color = 'red', label = 'Ridge')
plt.plot(freqs, np.log(pgram_mean_lasso_ol), color = 'black', label = 'LASSO')
plt.legend()
plt.show()
n = len(y_cl)
freqs, pgram_cl = periodogram(y_cl)
alpha_opt_ridge_cl, freq = spectrum_estimator_ridge(y_cl, 100000)
pgram_mean_ridge_cl = (2/n)*(np.exp(2*alpha_opt_ridge_cl))
alpha_opt_lasso_cl, freq = spectrum_estimator_lasso(y_cl, 1000)
pgram_mean_lasso_cl = (2/n)*(np.exp(2*alpha_opt_lasso_cl))plt.figure(figsize = (12, 6))
plt.plot(freqs, (pgram_cl), color = 'lightblue', label = 'Periodogram')
#plt.plot(freqs, np.log(pgram_cl), color = 'None', label = 'Periodogram')
plt.title('Periodogram (CL)')
plt.plot(freqs, (pgram_mean_ridge_cl), color = 'red', label = 'Ridge')
plt.plot(freqs, (pgram_mean_lasso_cl), color = 'black', label = 'LASSO')
plt.legend()
plt.show()
plt.figure(figsize = (12, 6))
plt.plot(freqs, np.log(pgram_cl), color = 'lightblue', label = 'Periodogram')
#plt.plot(freqs, np.log(pgram_cl), color = 'None', label = 'Periodogram')
plt.title('Periodogram (CL)')
plt.plot(freqs, np.log(pgram_mean_ridge_cl), color = 'red', label = 'Ridge')
plt.plot(freqs, np.log(pgram_mean_lasso_cl), color = 'black', label = 'LASSO')
plt.legend()
plt.show()
plt.figure(figsize = (12, 6))
plt.plot(freqs, np.log(pgram_mean_lasso_ol), color = 'black', label = 'OL')
plt.plot(freqs, np.log(pgram_mean_lasso_cl), color = 'red', label = 'CL')
plt.legend()
plt.title('Log Power Spectral Density')
plt.show()
There are three loops on the open loop spectrum and two on the closed loop spectrum. The control system reduces the overall power of the vibrations and gets rid of one harmonic component. In other words, the control system not only reduces the vibration but also brings it closer to a sinusoid.
The main point is that the differences between the two time series is much better summarized using their power spectra, compared to the raw data.
Application Four: FRED dataset¶
The following datset is FRED’s “Industrial Production: Total Index” dataset. It is a monthly dataset that is not seasonally adjusted.
prod_index = pd.read_csv("IPB50001N-06March2025FRED.csv")
print(prod_index.head(10))
pind = prod_index['IPB50001N']
plt.figure(figsize = (12, 6))
plt.plot(pind)
plt.show() observation_date IPB50001N
0 1919-01-01 4.7841
1 1919-02-01 4.5959
2 1919-03-01 4.4884
3 1919-04-01 4.5691
4 1919-05-01 4.7035
5 1919-06-01 4.9991
6 1919-07-01 5.1604
7 1919-08-01 5.2679
8 1919-09-01 5.2947
9 1919-10-01 5.2679

The data has an increasing trend so it does not make sense to use the spectrum model directly here. Instead, let us work with the annual growth rates:
pind = prod_index['IPB50001N']
pind = pind.to_numpy()
y = 100*(np.log(pind[12:]) - np.log(pind[:-12]))
plt.plot(y)
n = len(y)
print(n) #now n is odd
plt.show()1261

freqs, pgram = periodogram(y)
plt.figure(figsize = (12, 6))
plt.plot(freqs, pgram)
plt.title('Periodogram')
plt.show()
freqs, pgram = periodogram(y)
plt.plot(freqs, np.log(pgram))
plt.title('Log Periodogram')
plt.show()
alpha_opt_ridge, freq = spectrum_estimator_ridge(y, 5000)
power_ridge = (2/n)*(np.exp(2*alpha_opt_ridge))
alpha_opt_lasso, freq = spectrum_estimator_lasso(y, 100)
power_lasso = (2/n)*(np.exp(2*alpha_opt_lasso))
markerline, stemline, baseline = plt.stem(freq, pgram, linefmt = 'lightblue', basefmt = '')
markerline.set_marker("None")
plt.title('Periodogram and the Power Spectrum')
plt.plot(freq, power_ridge, color = 'red', label = 'Ridge')
plt.plot(freq, power_lasso, color = 'black', label = "LASSO")
plt.legend()
plt.show()

plt.plot(freq, np.log(pgram), color = 'lightblue')
plt.title('Log Periodogram and Log Power Spectrum')
plt.plot(freq, np.log(power_ridge), color = 'red', label = 'Ridge')
plt.plot(freq, np.log(power_lasso), color = 'black', label = "LASSO")
plt.legend()
plt.show()

from scipy.signal import find_peaks
# Find peaks
peaks, _ = find_peaks(np.log(power_ridge))
print("Peaks:", peaks)
print(freq[peaks])
print(1/freq[peaks])Peaks: [ 27 140 248 266 358 458 484 577]
[0.0222046 0.11181602 0.19746233 0.21173672 0.28469469 0.36399683
0.38461538 0.45836638]
[45.03571429 8.94326241 5.06425703 4.72284644 3.51253482 2.74727669
2.6 2.1816609 ]
Economists use this as evidence for existence of a business cycle with period around 45 months (which is close to 4 years). See Section 6.4 for the Hamilton book on time series for more on this example.