|
| 1 | +# ============================================================================== |
| 2 | +# Blume–Capel Numerical Stability Study (clean, documented) |
| 3 | +# File: dev/numerical_analyses/BCvar_normalization_PL.r |
| 4 | +# |
| 5 | +# Goal |
| 6 | +# ---- |
| 7 | +# Compare numerical stability of four ways to compute the Blume–Capel |
| 8 | +# normalizing constant across a range of residual scores r: |
| 9 | +# |
| 10 | +# Methods (exactly four): |
| 11 | +# 1) Direct — unbounded sum of exp(θ_lin*s + θ_quad*s^2 + s*r) |
| 12 | +# 2) Preexp — unbounded “power-chain” (precompute exp(θ-part), reuse exp(r)) |
| 13 | +# 3) Direct + bound — SPLIT bound: b_theta + b_r, computing sum(exp(term - b_theta - b_r)) |
| 14 | +# 4) Preexp + bound — SPLIT bound with power-chain on r-part only |
| 15 | +# |
| 16 | +# Split bound (the only bound we use): |
| 17 | +# - b_theta = max_s (θ_lin*s + θ_quad*s^2) # depends only on model, constant in r |
| 18 | +# - b_r = C* * r, where C* = argmax_s |s| # depends only on r and the score range |
| 19 | +# |
| 20 | +# References (for error calculation): |
| 21 | +# - ref_unscaled = MPFR sum(exp(term)) |
| 22 | +# - ref_split_scaled = MPFR sum(exp( (θ_part - b_theta) + (s*r - b_r) )) |
| 23 | +# |
| 24 | +# Dependencies |
| 25 | +# ------------ |
| 26 | +# - Rmpfr |
| 27 | +# |
| 28 | +# Outputs |
| 29 | +# ------- |
| 30 | +# - compare_bc_all_methods(...) returns a data.frame with: |
| 31 | +# r : grid of residual scores |
| 32 | +# direct : numeric, Σ exp(term) |
| 33 | +# preexp : numeric, Σ via power-chain (unbounded) |
| 34 | +# direct_bound : numeric, Σ exp((θ-bθ) + (s*r - b_r)) |
| 35 | +# preexp_bound : numeric, Σ via power-chain with split bound |
| 36 | +# err_direct : |(direct - ref_unscaled)/ref_unscaled| |
| 37 | +# err_preexp : |(preexp - ref_unscaled)/ref_unscaled| |
| 38 | +# err_direct_bound : |(direct_bound - ref_split_scaled)/ref_split_scaled| |
| 39 | +# err_preexp_bound : |(preexp_bound - ref_split_scaled)/ref_split_scaled| |
| 40 | +# ref_unscaled : numeric MPFR reference (unbounded) |
| 41 | +# ref_split_scaled : numeric MPFR reference (split-scaled) |
| 42 | +# |
| 43 | +# Plotting helpers: |
| 44 | +# - plot_bc_four(res, ...) |
| 45 | +# - summarize_bc_four(res) |
| 46 | +# |
| 47 | +# ============================================================================== |
| 48 | +library(Rmpfr) |
| 49 | + |
| 50 | +# ------------------------------------------------------------------------------ |
| 51 | +# compare_bc_all_methods |
| 52 | +# ------------------------------------------------------------------------------ |
| 53 | +# Compute all four methods and MPFR references over a vector of r-values. |
| 54 | +# |
| 55 | +# Args: |
| 56 | +# C : integer, max category (scores are s = 0..C) |
| 57 | +# ref : integer, baseline category index (scores centered by s <- 0:C - ref) |
| 58 | +# r_vals : numeric vector of r values to scan |
| 59 | +# theta_lin : numeric, linear θ parameter |
| 60 | +# theta_quad : numeric, quadratic θ parameter |
| 61 | +# mpfr_prec : integer, MPFR precision (bits) for reference calculations |
| 62 | +# |
| 63 | +# Returns: |
| 64 | +# data.frame with columns described in the file header (see “Outputs”). |
| 65 | +# |
| 66 | +compare_bc_all_methods <- function(C = 10, |
| 67 | + ref = 3, |
| 68 | + r_vals = seq(-70, 70, length.out = 2000), |
| 69 | + theta_lin = 0.12, |
| 70 | + theta_quad = -0.02, |
| 71 | + mpfr_prec = 256) { |
| 72 | + |
| 73 | + # --- score grid and θ-part --------------------------------------------------- |
| 74 | + s_vals <- 0:C - ref |
| 75 | + smin <- min(s_vals) |
| 76 | + |
| 77 | + # θ_part(s) = θ_lin*s + θ_quad*s^2 |
| 78 | + theta_part <- theta_lin * s_vals + theta_quad * s_vals^2 |
| 79 | + |
| 80 | + # --- split bound components (fixed model part, r-dependent rest part) ------- |
| 81 | + b_theta <- max(theta_part) # constant over r |
| 82 | + Cstar <- s_vals[ which.max(abs(s_vals)) ] # farthest-from-zero score |
| 83 | + |
| 84 | + # Precomputed exponentials for the two chains we use |
| 85 | + exp_m <- exp(theta_part) # for unbounded preexp chain |
| 86 | + exp_m_theta_scaled <- exp(theta_part - b_theta) # for split-bounded variants |
| 87 | + |
| 88 | + # Output container |
| 89 | + res <- data.frame( |
| 90 | + r = r_vals, |
| 91 | + direct = NA_real_, |
| 92 | + preexp = NA_real_, |
| 93 | + direct_bound = NA_real_, # split bound |
| 94 | + preexp_bound = NA_real_, # split bound |
| 95 | + err_direct = NA_real_, |
| 96 | + err_preexp = NA_real_, |
| 97 | + err_direct_bound = NA_real_, |
| 98 | + err_preexp_bound = NA_real_, |
| 99 | + ref_unscaled = NA_real_, |
| 100 | + ref_split_scaled = NA_real_ |
| 101 | + ) |
| 102 | + |
| 103 | + # --- MPFR constants independent of r ---------------------------------------- |
| 104 | + tl <- mpfr(theta_lin, mpfr_prec) |
| 105 | + tq <- mpfr(theta_quad, mpfr_prec) |
| 106 | + s_mp <- mpfr(s_vals, mpfr_prec) |
| 107 | + bth_mp<- mpfr(b_theta, mpfr_prec) |
| 108 | + |
| 109 | + # --- Main loop over r -------------------------------------------------------- |
| 110 | + for (i in seq_along(r_vals)) { |
| 111 | + r <- r_vals[i] |
| 112 | + term <- theta_part + s_vals * r # double-precision exponents |
| 113 | + |
| 114 | + # r-dependent split bound |
| 115 | + b_r <- Cstar * r |
| 116 | + |
| 117 | + # ---------- MPFR references ---------- |
| 118 | + r_mp <- mpfr(r, mpfr_prec) |
| 119 | + br_mp <- mpfr(b_r, mpfr_prec) |
| 120 | + |
| 121 | + term_mp <- tl*s_mp + tq*s_mp*s_mp + s_mp*r_mp |
| 122 | + ref_unscaled_mp <- sum(exp(term_mp)) # Σ exp(term) |
| 123 | + ref_split_scaled_mp <- sum(exp((tl*s_mp + tq*s_mp*s_mp - bth_mp) + |
| 124 | + (s_mp*r_mp - br_mp))) # Σ exp((θ-bθ)+(s*r-b_r)) |
| 125 | + |
| 126 | + # Store numeric references |
| 127 | + res$ref_unscaled[i] <- asNumeric(ref_unscaled_mp) |
| 128 | + res$ref_split_scaled[i] <- asNumeric(ref_split_scaled_mp) |
| 129 | + |
| 130 | + # ---------- (1) Direct (unbounded) ---------- |
| 131 | + v_direct <- sum(exp(term)) |
| 132 | + res$direct[i] <- v_direct |
| 133 | + |
| 134 | + # ---------- (2) Preexp (unbounded) ---------- |
| 135 | + # Power-chain on exp(r): start at s = smin |
| 136 | + eR <- exp(r) |
| 137 | + pow <- exp(smin * r) |
| 138 | + S_pre <- 0.0 |
| 139 | + for (j in seq_along(s_vals)) { |
| 140 | + S_pre <- S_pre + exp_m[j] * pow |
| 141 | + pow <- pow * eR |
| 142 | + } |
| 143 | + res$preexp[i] <- S_pre |
| 144 | + |
| 145 | + # ---------- (3) Direct + bound (SPLIT) ---------- |
| 146 | + # Σ exp( (θ - b_theta) + (s r - b_r) ), explicit loop for clarity |
| 147 | + sum_val <- 0.0 |
| 148 | + for (j in seq_along(s_vals)) { |
| 149 | + sum_val <- sum_val + exp( (theta_part[j] - b_theta) + (s_vals[j]*r - b_r) ) |
| 150 | + } |
| 151 | + res$direct_bound[i] <- sum_val |
| 152 | + |
| 153 | + # ---------- (4) Preexp + bound (SPLIT) ---------- |
| 154 | + # Fold ONLY -b_r into the r-chain; θ-part is pre-scaled outside. |
| 155 | + pow_b <- exp(smin * r - b_r) |
| 156 | + S_pre_b <- 0.0 |
| 157 | + for (j in seq_along(s_vals)) { |
| 158 | + S_pre_b <- S_pre_b + exp_m_theta_scaled[j] * pow_b |
| 159 | + pow_b <- pow_b * eR |
| 160 | + } |
| 161 | + res$preexp_bound[i] <- S_pre_b |
| 162 | + |
| 163 | + # ---------- Errors (vs MPFR) ---------- |
| 164 | + res$err_direct[i] <- asNumeric(abs((mpfr(v_direct, mpfr_prec) - ref_unscaled_mp) / ref_unscaled_mp)) |
| 165 | + res$err_preexp[i] <- asNumeric(abs((mpfr(S_pre, mpfr_prec) - ref_unscaled_mp) / ref_unscaled_mp)) |
| 166 | + res$err_direct_bound[i] <- asNumeric(abs((mpfr(res$direct_bound[i], mpfr_prec) - ref_split_scaled_mp) / ref_split_scaled_mp)) |
| 167 | + res$err_preexp_bound[i] <- asNumeric(abs((mpfr(res$preexp_bound[i], mpfr_prec) - ref_split_scaled_mp) / ref_split_scaled_mp)) |
| 168 | + } |
| 169 | + |
| 170 | + res |
| 171 | +} |
| 172 | + |
| 173 | +# ------------------------------------------------------------------------------ |
| 174 | +# plot_bc_four |
| 175 | +# ------------------------------------------------------------------------------ |
| 176 | +# Plot the four relative error curves on a log y-axis. |
| 177 | +# |
| 178 | +# Args: |
| 179 | +# res : data.frame produced by compare_bc_all_methods() |
| 180 | +# draw_order : character vector with any ordering of: |
| 181 | +# c("err_direct","err_direct_bound","err_preexp_bound","err_preexp") |
| 182 | +# alpha : named numeric vector (0..1) alphas for the same names |
| 183 | +# lwd : line width |
| 184 | +# |
| 185 | +# Returns: (invisible) NULL. Draws a plot. |
| 186 | +# |
| 187 | +plot_bc_four <- function(res, |
| 188 | + draw_order = c("err_direct","err_direct_bound","err_preexp_bound","err_preexp"), |
| 189 | + alpha = c(err_direct=0.00, err_direct_bound=0.00, err_preexp_bound=0.40, err_preexp=0.40), |
| 190 | + lwd = 2) { |
| 191 | + base_cols <- c(err_direct="#000000", err_preexp="#D62728", |
| 192 | + err_direct_bound="#1F77B4", err_preexp_bound="#9467BD") |
| 193 | + to_rgba <- function(hex, a) rgb(t(col2rgb(hex))/255, alpha=a) |
| 194 | + cols <- mapply(to_rgba, base_cols[draw_order], alpha[draw_order], SIMPLIFY=TRUE, USE.NAMES=TRUE) |
| 195 | + |
| 196 | + vals <- unlist(res[draw_order]); vals <- vals[is.finite(vals)] |
| 197 | + ylim <- if (length(vals)) { |
| 198 | + q <- stats::quantile(vals, c(.01,.99), na.rm=TRUE); c(q[1]/10, q[2]*10) |
| 199 | + } else c(1e-20,1e-12) |
| 200 | + |
| 201 | + first <- draw_order[1] |
| 202 | + plot(res$r, res[[first]], type="l", log="y", col=cols[[1]], lwd=lwd, ylim=ylim, |
| 203 | + xlab="r", ylab="Relative error (vs MPFR)", |
| 204 | + main="Blume–Capel: Direct / Preexp / (Split) Bound") |
| 205 | + if (length(draw_order) > 1) { |
| 206 | + for (k in 2:length(draw_order)) lines(res$r, res[[draw_order[k]]], col=cols[[k]], lwd=lwd) |
| 207 | + } |
| 208 | + abline(h=.Machine$double.eps, col="gray70", lty=2) |
| 209 | + legend("top", |
| 210 | + legend=c("Direct","Direct + bound (split)","Preexp + bound (split)","Preexp") |
| 211 | + [match(draw_order, c("err_direct","err_direct_bound","err_preexp_bound","err_preexp"))], |
| 212 | + col=cols, lwd=lwd, bty="n") |
| 213 | + invisible(NULL) |
| 214 | +} |
| 215 | + |
| 216 | +# ------------------------------------------------------------------------------ |
| 217 | +# summarize_bc_four |
| 218 | +# ------------------------------------------------------------------------------ |
| 219 | +# Summarize accuracy per method. |
| 220 | +# |
| 221 | +# Args: |
| 222 | +# res : data.frame from compare_bc_all_methods() |
| 223 | +# |
| 224 | +# Returns: |
| 225 | +# data.frame with columns: Method, Mean, Median, Max, Finite |
| 226 | +# |
| 227 | +summarize_bc_four <- function(res) { |
| 228 | + cols <- c("err_direct","err_direct_bound","err_preexp_bound","err_preexp") |
| 229 | + labs <- c("Direct","Direct+Bound(split)","Preexp+Bound(split)","Preexp") |
| 230 | + mk <- function(v){ |
| 231 | + f <- is.finite(v) & v > 0 |
| 232 | + c(Mean=mean(v[f]), Median=median(v[f]), Max=max(v[f]), Finite=mean(f)) |
| 233 | + } |
| 234 | + out <- t(sapply(cols, function(nm) mk(res[[nm]]))) |
| 235 | + data.frame(Method=labs, out, row.names=NULL, check.names=FALSE) |
| 236 | +} |
| 237 | + |
| 238 | +# ============================================================================== |
| 239 | +# Example usage (uncomment to run locally) |
| 240 | +# ------------------------------------------------------------------------------ |
| 241 | +# res <- compare_bc_all_methods( |
| 242 | +# C = 10, ref = 0, |
| 243 | +# r_vals = seq(-80, -70, length.out = 2000), |
| 244 | +# theta_lin = 0.12, |
| 245 | +# theta_quad = 0.50, |
| 246 | +# mpfr_prec = 256 |
| 247 | +# ) |
| 248 | +# plot_bc_four(res, |
| 249 | +# draw_order = c("err_direct","err_direct_bound","err_preexp_bound","err_preexp"), |
| 250 | +# alpha = c(err_direct=0.00, err_direct_bound=0.50, err_preexp_bound=0.0, err_preexp=0.00), |
| 251 | +# lwd = 1) |
| 252 | +# abline(v = 70); abline(v = -70) |
| 253 | +# print(summarize_bc_four(res), digits = 3) |
| 254 | +# ============================================================================== |
| 255 | + |
| 256 | +# ============================================================================== |
| 257 | +# Results Observations (curated runs) |
| 258 | +# ------------------------------------------------------------------------------ |
| 259 | +# The blocks below capture observed behavior for representative settings. |
| 260 | +# They are comments only—kept in-file so future readers see context quickly. |
| 261 | +# |
| 262 | +# ## ref = 5, C = 10 |
| 263 | +# res <- compare_bc_all_methods(C = 10, ref = 5, |
| 264 | +# r_vals = seq(-70, 70, length.out = 2000), |
| 265 | +# theta_lin = 0.12, theta_quad = 0.50, mpfr_prec = 256) |
| 266 | +# - Direct bounded a bit less variable |
| 267 | +# - Direct unbounded is significantly worse than the rest |
| 268 | +# Conclusion: Don’t use direct unbounded. |
| 269 | +# |
| 270 | +# res <- compare_bc_all_methods(C = 10, ref = 5, |
| 271 | +# r_vals = seq(-100, -65, length.out = 2000), |
| 272 | +# theta_lin = 0.12, theta_quad = 0.50, mpfr_prec = 256) |
| 273 | +# - All methods continue to work decently even until r < -100. |
| 274 | +# - Direct unbounded is significantly worse than the rest |
| 275 | +# Conclusion: Don’t use direct unbounded. |
| 276 | +# |
| 277 | +# res <- compare_bc_all_methods(C = 10, ref = 5, |
| 278 | +# r_vals = seq(65, 80, length.out = 2000), |
| 279 | +# theta_lin = 0.12, theta_quad = 0.50, mpfr_prec = 256) |
| 280 | +# - The bounded variants work until r < 71 |
| 281 | +# - The unbounded variants continue to work decently even until r > 100. |
| 282 | +# - Direct unbounded is significantly worse than the rest |
| 283 | +# Conclusion: Use Preexp bounded for r < 71; Use Preexp unbounded for r > 71. |
| 284 | +# |
| 285 | +# ## ref = 0, C = 10 |
| 286 | +# res <- compare_bc_all_methods(C = 10, ref = 0, |
| 287 | +# r_vals = seq(-70, 70, length.out = 2000), |
| 288 | +# theta_lin = 0.12, theta_quad = 0.50, mpfr_prec = 256) |
| 289 | +# - Direct bounded is less variable; Direct is worst overall |
| 290 | +# Conclusion: Don’t use direct unbounded. |
| 291 | +# |
| 292 | +# res <- compare_bc_all_methods(C = 10, ref = 0, |
| 293 | +# r_vals = seq(-80, -70, length.out = 2000), |
| 294 | +# theta_lin = 0.12, theta_quad = 0.50, mpfr_prec = 256) |
| 295 | +# - The two unbounded variants fail when r < 0. |
| 296 | +# - Preexp bounded works until r < 71 |
| 297 | +# - Direct bounded until r < -76. |
| 298 | +# Conclusion: Use Direct bounded for r in [-75, -70]; Use Preexp bounded for r > -71. |
| 299 | +# |
| 300 | +# res <- compare_bc_all_methods(C = 10, ref = 0, |
| 301 | +# r_vals = seq(65, 80, length.out = 2000), |
| 302 | +# theta_lin = 0.12, theta_quad = 0.50, mpfr_prec = 256) |
| 303 | +# - The two unbounded variants fail when r > 65 |
| 304 | +# - Direct bounded remains stable throughout. |
| 305 | +# - Preexp bounded blows up after r > 71 |
| 306 | +# Conclusion: Use Direct bounded for r > 71; Use Preexp bounded for r < 71. |
| 307 | +# |
| 308 | +# ## ref = 10, C = 10 |
| 309 | +# res <- compare_bc_all_methods(C = 10, ref = 10, |
| 310 | +# r_vals = seq(-70, 70, length.out = 2000), |
| 311 | +# theta_lin = 0.12, theta_quad = 0.50, mpfr_prec = 256) |
| 312 | +# - The unbounded variants become worse after r > 40 |
| 313 | +# - Direct bounded is less variable |
| 314 | +# Conclusion: Don’t use unbounded. |
| 315 | +# |
| 316 | +# res <- compare_bc_all_methods(C = 10, ref = 10, |
| 317 | +# r_vals = seq(-80, -65, length.out = 2000), |
| 318 | +# theta_lin = 0.12, theta_quad = 0.50, mpfr_prec = 256) |
| 319 | +# - The two unbounded variants fail when r < 66. |
| 320 | +# - The two bounded variants continue to work well even until r < -100. |
| 321 | +# Conclusion: Don’t use unbounded. |
| 322 | +# |
| 323 | +# res <- compare_bc_all_methods(C = 10, ref = 10, |
| 324 | +# r_vals = seq(65, 80, length.out = 2000), |
| 325 | +# theta_lin = 0.12, theta_quad = 0.50, mpfr_prec = 256) |
| 326 | +# - The two unbounded variants fail when r > 75; direct unbounded already past r > 0. |
| 327 | +# - Direct bounded remains stable until r < 76 |
| 328 | +# - Preexp bounded blows up after r > 71 |
| 329 | +# Conclusion: Use Direct bounded for r > 71; Use Preexp bounded for r < 71. |
| 330 | +# |
| 331 | +# ### Overall conclusions |
| 332 | +# - Direct unbounded is never recommended. |
| 333 | +# - For ref near center (e.g., ref = 5 when C = 10): |
| 334 | +# • Use Preexp bounded for moderate |r| (≈ |r| < 70) |
| 335 | +# • Use Preexp unbounded for large positive r (r > 71) |
| 336 | +# • Use Direct bounded for large negative r (r < -70) |
| 337 | +# - For ref near edge (e.g., ref = 0 or ref = 10): |
| 338 | +# • Use Direct bounded for large |r| (|r| > 70) |
| 339 | +# • Use Preexp bounded for moderate |r| |
| 340 | +# ============================================================================== |
| 341 | + |
0 commit comments