PIT transforms

n = 10000
x = rnorm(n)
F1 = function(x) pnorm(x, sd = 0.75)  # Forecaster #1, underdispersed
F2 = function(x) pnorm(x, sd = 2)     # Forecaster #2, overdispersed

x0 = seq(-3, 3, length = 100)
cols = c(1, 2, 4)

# Plot the densities
matplot(x0, cbind(dnorm(x0), dnorm(x0, sd = 0.75), dnorm(x0, sd = 2)),
        col = cols, type = "l", lty = 1, xlab = "x", ylab = "Density")
legend("topleft", col = cols, lty = 1, legend = c("True density", "f1", "f2"))

# Plot the CDFs
matplot(x0, cbind(pnorm(x0), F1(x0), F2(x0)), col = cols, 
        type = "l", lty = 1, xlab = "x", ylab = "Cumulative probability")
legend("topleft", col = cols, lty = 1, legend = c("True CDF", "F1", "F2"))

# Plot the PIT values
hist(F1(x), breaks = 50, col = paste0(palette()[cols[2]], "44"), border = NA, 
     prob = TRUE, xlab = "PIT value", main = "")
hist(F2(x), breaks = 50, col = paste0(palette()[cols[3]], "44"), border = NA, 
     prob = TRUE, add = TRUE)
legend("topleft", col = cols[2:3], lty = 1, legend = c("F1(x)", "F2(x)"))

Ensembles and PIT calibration

n = 10000
x1 = rnorm(n)
x2 = rnorm(n)

# The target variable has two random components: x1 and x2
x = x1 + x2

# Forecaster #1 knows the x1 component, and returns N(x1, 1) as its forecast
F1 = function(x) pnorm(x, mean = x1)

# Forecaster #2 knows the x2 component, and returns N(x2, 1) as its forecast
F2 = function(x) pnorm(x, mean = x2)

# Each one is PIT calibrated!
hist(F1(x), breaks = 50, col = paste0(palette()[2], "44"), border = NA, 
     prob = TRUE, xlab = "PIT value", main = "")
hist(F2(x), breaks = 50, col = paste0(palette()[4], "44"), border = NA, 
     prob = TRUE, add = TRUE)
legend("topleft", col = c(2, 4), lty = 1, legend = c("F1(x)", "F2(x)"))

# But what about the simple average ensemble? It is overdispersed!
F = function(x) (F1(x) + F2(x)) / 2
hist(F(x), breaks = 50, col = "lightgray", border = NA, prob = TRUE, 
     xlab = "PIT value", main = "")