3030# ' during prediction will *not* be imputed.
3131# ' - If set to `"always"`, an unseen level is added to the feature during training and missing values are imputed as
3232# ' that value during prediction.
33- # ' - Finally, if set to `"param"`, the hyperparameter `create_empty_levels ` is added and control over this behavior is
33+ # ' - Finally, if set to `"param"`, the hyperparameter `create_empty_level ` is added and control over this behavior is
3434# ' left to the user.
3535# '
3636# ' For implementation details, see Internals below. Default is `"never"`.
@@ -160,6 +160,7 @@ PipeOpImpute = R6Class("PipeOpImpute",
160160 private $ .create_empty_level = FALSE
161161 emplvls_control_ps = ps()
162162 } else if (empty_level_control == " param" ) {
163+ private $ .create_empty_level = NULL
163164 # Setting create_empty_level modifies private$.create_empty_field later in train and predict
164165 emplvls_control_ps = ps(create_empty_level = p_lgl(init = FALSE , tags = c(" train" , " predict" )))
165166 }
@@ -201,10 +202,10 @@ PipeOpImpute = R6Class("PipeOpImpute",
201202 intask = inputs [[1 ]]$ clone(deep = TRUE )
202203 pv = self $ param_set $ get_values(tags = " train" )
203204
204- # If the hyperparameter exists, we overwrite the private field here, and can simply check the private field after
205- # this without having to check conditions on both the hyperparameter and the private field
205+ # If the hyperparameter exists, then private$.create_empty_level is NULL and will be ignored
206+ create_empty_level = private $ .create_empty_level
206207 if (! is.null(pv $ create_empty_level )) {
207- private $ . create_empty_level = pv $ create_empty_level
208+ create_empty_level = pv $ create_empty_level
208209 }
209210
210211 affected_cols = (pv $ affect_columns %??% selector_all())(intask )
@@ -222,9 +223,9 @@ PipeOpImpute = R6Class("PipeOpImpute",
222223 }
223224
224225 imputanda = intask $ data(cols = affected_cols )
225- if (private $ . create_empty_level ) {
226+ if (create_empty_level ) {
226227 # Also run impute on all factor/ordered columns that don't have any NAs
227- imputanda = imputanda [, map_lgl(imputanda , function (x ) anyMissing (x ) || is.factor (x )), with = FALSE ]
228+ imputanda = imputanda [, map_lgl(imputanda , function (x ) is.factor (x ) || anyMissing (x )), with = FALSE ]
228229 } else {
229230 imputanda = imputanda [, map_lgl(imputanda , function (x ) anyMissing(x )), with = FALSE ]
230231 }
@@ -273,18 +274,21 @@ PipeOpImpute = R6Class("PipeOpImpute",
273274 context_data = intask $ data(cols = self $ state $ context_cols )
274275 }
275276
276- # If the hyperparameter exists, we overwrite the private field here, and can simply check the private field after
277- # this without having to check conditions on both the hyperparameter and the private field
277+ # If the hyperparameter exists and is set to FALSE, we do not impute factor cols that had no missings during train.
278+ # If the HP does not exist, then we always call impute, since imputing will either not add a new factor
279+ # (empty_level_control = "never") or the new factor will have been taken care of (empty_level_control = "always")
278280 pv = self $ param_set $ get_values(tags = " predict" )
279281 if (! is.null(pv $ create_empty_level )) {
280- private $ .create_empty_level = pv $ create_empty_level
282+ predict_all_factors = pv $ create_empty_level
283+ } else {
284+ predict_all_factors = TRUE
281285 }
282286
283287 imputanda = intask $ data(cols = self $ state $ affected_cols )
284- if (! private $ .create_empty_level ) {
288+ if (! predict_all_factors ) {
285289 # Don't run impute for factor/ordered columns that were not imputed during training
286290 imputanda = imputanda [,
287- colnames(imputanda ) %in% self $ state $ imputed_train | map_lgl(imputanda , function (x ) anyMissing (x ) && ! is.factor (x )),
291+ colnames(imputanda ) %in% self $ state $ imputed_train | map_lgl(imputanda , function (x ) ! is.factor (x ) && anyMissing (x )),
288292 with = FALSE ]
289293 } else {
290294 imputanda = imputanda [,
0 commit comments