@@ -30,7 +30,7 @@ function missingness_encoder_fit(
30
30
features:: AbstractVector{Symbol} = Symbol[];
31
31
ignore:: Bool = true ,
32
32
ordered_factor:: Bool = false ,
33
- label_for_missing:: Dict{<:Type, <:Any} = Dict (
33
+ label_for_missing:: Dict{<:Type, <:Any} = Dict (
34
34
AbstractString => " missing" ,
35
35
Char => ' m' ,
36
36
),
@@ -40,8 +40,8 @@ function missingness_encoder_fit(
40
40
41
41
# 1. Define feature mapper
42
42
function feature_mapper (col, name)
43
- col_type = nonmissingtype ( eltype ( col)) . parameters[ 1 ]
44
- feat_levels = levels (col; skipmissing = true )
43
+ feat_levels = levels ( col; skipmissing = true )
44
+ col_type = nonmissingtype ( eltype (feat_levels) )
45
45
46
46
# Ensure column type is valid (can't test because never occurs)
47
47
# Converting array elements to strings before wrapping in a `CategoricalArray`, as...
@@ -58,7 +58,7 @@ function missingness_encoder_fit(
58
58
59
59
# Check no collision between keys(label_for_missing) and feat_levels
60
60
for value in values (label_for_missing)
61
- if ! ismissing (value)
61
+ if ! ismissing (value)
62
62
if value in feat_levels
63
63
throw (ArgumentError (COLLISION_NEW_VAL_ME (value)))
64
64
end
@@ -73,7 +73,7 @@ function missingness_encoder_fit(
73
73
break
74
74
end
75
75
end
76
-
76
+
77
77
# Nonmissing levels remain as is
78
78
label_for_missing_given_feature = Dict {Missing, col_type} ()
79
79
@@ -91,7 +91,8 @@ function missingness_encoder_fit(
91
91
92
92
# 2. Pass it to generic_fit
93
93
label_for_missing_given_feature, encoded_features = generic_fit (
94
- X, features; ignore = ignore, ordered_factor = ordered_factor, feature_mapper = feature_mapper,
94
+ X, features; ignore = ignore, ordered_factor = ordered_factor,
95
+ feature_mapper = feature_mapper,
95
96
)
96
97
cache = Dict (
97
98
:label_for_missing_given_feature => label_for_missing_given_feature,
@@ -117,6 +118,11 @@ Apply a fitted missingness encoder to a table given the output of `missingness_e
117
118
"""
118
119
function missingness_encoder_transform (X, cache:: Dict )
119
120
label_for_missing_given_feature = cache[:label_for_missing_given_feature ]
120
- return generic_transform (X, label_for_missing_given_feature; ignore_unknown = true )
121
+ return generic_transform (
122
+ X,
123
+ label_for_missing_given_feature;
124
+ ignore_unknown = true ,
125
+ ensure_categorical = true ,
126
+ )
121
127
end
122
128
0 commit comments