@@ -215,18 +215,6 @@ attr(lnr_homoskedastic_density, 'sl_lnr_type') <- 'density'
215215#' Conditional Density Estimation with Heteroskedasticity
216216#'
217217#'
218- #' TODO: The following code has a bug / statistical issue.
219- #' =======================================================
220- #'
221- #' I think there are bugs with this because performing a basic
222- #' test that if we fix the conditioning set (X) and integrate, integrating
223- #' a conditional probability density with X fixed should yield 1.
224- #'
225- #' In numerical tests, when the variance is scaled for, integrating conditional
226- #' densities seems to yield integration values exceeding 1 (sometimes by a lot).
227- #' I am pretty sure this poses a problem for optimizing negative log likelihood loss.
228- #'
229- #' Said numerical tests are displayed in the `Density-Estimation` article.
230218#' @export
231219#' @returns a closure (function) that produces density estimates
232220#' at the \code{newdata} given according to the fit model.
@@ -237,10 +225,10 @@ attr(lnr_homoskedastic_density, 'sl_lnr_type') <- 'density'
237225#' around the predicted conditional mean in the output.
238226#' @param var_lnr_args Extra arguments to be passed to the \code{var_lnr}
239227lnr_heteroskedastic_density <- function(data, formula,
240- mean_lnr, var_lnr,
241- mean_lnr_args = NULL,
242- var_lnr_args = NULL,
243- density_args = NULL) {
228+ mean_lnr, var_lnr,
229+ mean_lnr_args = NULL,
230+ var_lnr_args = NULL,
231+ density_args = NULL) {
244232
245233 # fit the mean_lnr
246234 mean_predictor <- do.call(
@@ -257,6 +245,11 @@ lnr_heteroskedastic_density <- function(data, formula,
257245
258246 # calculate squared errors from the conditional mean predictor model
259247 errors_squared <- errors^2
248+
249+ # calculate a practical floor for error variance, called squared tolerance
250+ errors_sd <- sd(errors)
251+ tol2 <- (0.1 * errors_sd)^2
252+
260253 data$.errors_squared <- errors_squared
261254 var_formula <- as.formula(
262255 paste0(".errors_squared ~ ", as.character(formula)[[3]]))
@@ -279,8 +272,17 @@ lnr_heteroskedastic_density <- function(data, formula,
279272 errors <- newdata[[y_variable]] - mean_predictions
280273 var_predictions <- var_predictor(newdata)
281274 var_predictions[var_predictions < 0] <- min_obs_error_squared # should this be .Machine$double.eps ?
282- sd_predictions <- sqrt(var_predictions)
283- predicted_densities <- approx(density_model$x, density_model$y, errors / sd_predictions, rule = 2)$y / sd_predictions
275+
276+ # replace any NA or too-small var_pred with tol2
277+ var_preds_clean <- ifelse(
278+ is.na(var_predictions) | var_predictions < 0,
279+ tol2,
280+ var_predictions
281+ )
282+ sd_preds <- sqrt(var_preds_clean)
283+ errors <- errors / sd_preds
284+
285+ predicted_densities <- approx(density_model$x, density_model$y, errors, rule = 2)$y
284286 return(predicted_densities)
285287 }
286288 return(predictor)
0 commit comments