Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions src/encoders/contrast_encoder/contrast_encoder.jl
Original file line number Diff line number Diff line change
Expand Up @@ -134,10 +134,9 @@ function contrast_encoder_fit(
X, features; ignore = ignore, ordered_factor = ordered_factor,
feature_mapper = feature_mapper,
)

cache = Dict(
:vector_given_value_given_feature => vector_given_value_given_feature,
:encoded_features => encoded_features,
cache = (
vector_given_value_given_feature = vector_given_value_given_feature,
encoded_features = encoded_features,
)

return cache
Expand All @@ -157,7 +156,7 @@ Use a fitted contrast encoder to encode the levels of selected categorical varia

- `X_tr`: The table with selected features after the selected features are encoded by contrast encoding.
"""
function contrast_encoder_transform(X, cache::Dict)
vector_given_value_given_feature = cache[:vector_given_value_given_feature]
function contrast_encoder_transform(X, cache::NamedTuple)
vector_given_value_given_feature = cache.vector_given_value_given_feature
return generic_transform(X, vector_given_value_given_feature, single_feat = false; use_levelnames = true)
end
9 changes: 4 additions & 5 deletions src/encoders/contrast_encoder/interface_mlj.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,18 @@ function MMI.fit(transformer::ContrastEncoder, verbosity::Int, X)
buildmatrix = transformer.buildmatrix,
ordered_factor = transformer.ordered_factor,
)
fitresult = generic_cache[:vector_given_value_given_feature]
fitresult = generic_cache.vector_given_value_given_feature

report = (encoded_features = generic_cache[:encoded_features],) # report only has list of encoded features
report = (encoded_features = generic_cache.encoded_features,) # report only has list of encoded features
cache = nothing
return fitresult, cache, report
end;


# 6. Transform method
function MMI.transform(transformer::ContrastEncoder, fitresult, Xnew)
generic_cache = Dict(
:vector_given_value_given_feature =>
fitresult,
generic_cache = (
vector_given_value_given_feature = fitresult,
)
Xnew_transf = contrast_encoder_transform(Xnew, generic_cache)
return Xnew_transf
Expand Down
14 changes: 7 additions & 7 deletions src/encoders/frequency_encoding/frequency_encoding.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,11 @@ function frequency_encoder_fit(
# 2. Pass it to generic_fit
statistic_given_feat_val, encoded_features = generic_fit(
X, features; ignore = ignore, ordered_factor = ordered_factor,
feature_mapper = feature_mapper,
)
cache = Dict(
:statistic_given_feat_val => statistic_given_feat_val,
:encoded_features => encoded_features,
feature_mapper = feature_mapper,)
cache = (
statistic_given_feat_val = statistic_given_feat_val,
encoded_features = encoded_features,
)
return cache
end
Expand All @@ -62,7 +62,7 @@ Encode the levels of a categorical variable in a given table with their (normali

- `X_tr`: The table with selected features after the selected features are encoded by frequency encoding.
"""
function frequency_encoder_transform(X, cache::Dict)
statistic_given_feat_val = cache[:statistic_given_feat_val]
function frequency_encoder_transform(X, cache::NamedTuple)
statistic_given_feat_val = cache.statistic_given_feat_val
return generic_transform(X, statistic_given_feat_val)
end
9 changes: 4 additions & 5 deletions src/encoders/frequency_encoding/interface_mlj.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,18 @@ function MMI.fit(transformer::FrequencyEncoder, verbosity::Int, X)
normalize = transformer.normalize,
output_type = transformer.output_type,
)
fitresult = generic_cache[:statistic_given_feat_val]
fitresult = generic_cache.statistic_given_feat_val

report = (encoded_features = generic_cache[:encoded_features],) # report only has list of encoded features
report = (encoded_features = generic_cache.encoded_features,) # report only has list of encoded features
cache = nothing
return fitresult, cache, report
end;


# 6. Transform method
function MMI.transform(transformer::FrequencyEncoder, fitresult, Xnew)
generic_cache = Dict(
:statistic_given_feat_val =>
fitresult,
generic_cache = (
statistic_given_feat_val = fitresult,
)
Xnew_transf = frequency_encoder_transform(Xnew, generic_cache)
return Xnew_transf
Expand Down
9 changes: 4 additions & 5 deletions src/encoders/missingness_encoding/interface_mlj.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,19 +39,18 @@ function MMI.fit(transformer::MissingnessEncoder, verbosity::Int, X)
ordered_factor = transformer.ordered_factor,
label_for_missing = transformer.label_for_missing,
)
fitresult = generic_cache[:label_for_missing_given_feature]
fitresult = generic_cache.label_for_missing_given_feature

report = (encoded_features = generic_cache[:encoded_features],) # report only has list of encoded features
report = (encoded_features = generic_cache.encoded_features,) # report only has list of encoded features
cache = nothing
return fitresult, cache, report
end;


# 6. Transform method
function MMI.transform(transformer::MissingnessEncoder, fitresult, Xnew)
generic_cache = Dict(
:label_for_missing_given_feature =>
fitresult,
generic_cache = (
label_for_missing_given_feature = fitresult,
)
Xnew_transf = missingness_encoder_transform(Xnew, generic_cache)
return Xnew_transf
Expand Down
10 changes: 5 additions & 5 deletions src/encoders/missingness_encoding/missingness_encoding.jl
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,9 @@ function missingness_encoder_fit(
X, features; ignore = ignore, ordered_factor = ordered_factor,
feature_mapper = feature_mapper,
)
cache = Dict(
:label_for_missing_given_feature => label_for_missing_given_feature,
:encoded_features => encoded_features,
cache = (
label_for_missing_given_feature = label_for_missing_given_feature,
encoded_features = encoded_features,
)
return cache
end
Expand All @@ -116,8 +116,8 @@ Apply a fitted missingness encoder to a table given the output of `missingness_e

- `X_tr`: The table with selected features after the selected features are transformed by missingness encoder
"""
function missingness_encoder_transform(X, cache::Dict)
label_for_missing_given_feature = cache[:label_for_missing_given_feature]
function missingness_encoder_transform(X, cache::NamedTuple)
label_for_missing_given_feature = cache.label_for_missing_given_feature
return generic_transform(
X,
label_for_missing_given_feature;
Expand Down
8 changes: 3 additions & 5 deletions src/encoders/ordinal_encoding/interface_mlj.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,16 @@ function MMI.fit(transformer::OrdinalEncoder, verbosity::Int, X)
output_type = transformer.output_type,
)
fitresult =
generic_cache[:index_given_feat_level]
report = (encoded_features = generic_cache[:encoded_features],) # report only has list of encoded features
generic_cache.index_given_feat_level
report = (encoded_features = generic_cache.encoded_features,) # report only has list of encoded features
cache = nothing
return fitresult, cache, report
end;


# 6. Transform method
function MMI.transform(transformer::OrdinalEncoder, fitresult, Xnew)
generic_cache = Dict(
:index_given_feat_level => fitresult,
)
generic_cache = (index_given_feat_level = fitresult,)
Xnew_transf = ordinal_encoder_transform(Xnew, generic_cache)
return Xnew_transf
end
Expand Down
18 changes: 10 additions & 8 deletions src/encoders/ordinal_encoding/ordinal_encoding.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Fit an encoder to encode the levels of categorical variables in a given table as
- `ignore=true`: Whether to exclude or includes the features given in `features`
- `ordered_factor=false`: Whether to encode `OrderedFactor` or ignore them
- `dtype`: The numerical concrete type of the encoded features. Default is `Float32`.

# Returns (in a dict)

- `index_given_feat_level`: Maps each level for each column in a subset of the categorical features of X into an integer.
Expand All @@ -27,18 +28,19 @@ function ordinal_encoder_fit(
function feature_mapper(col, name)
feat_levels = levels(col)
index_given_feat_val =
Dict{eltype(feat_levels), output_type}(value => index for (index, value) in enumerate(feat_levels))
Dict{eltype(feat_levels), output_type}(
value => index for (index, value) in enumerate(feat_levels)
)
return index_given_feat_val
end

# 2. Pass it to generic_fit
index_given_feat_level, encoded_features = generic_fit(
X, features; ignore = ignore, ordered_factor = ordered_factor,
feature_mapper = feature_mapper,
)
cache = Dict(
:index_given_feat_level => index_given_feat_level,
:encoded_features => encoded_features,
feature_mapper = feature_mapper,)
cache = (
index_given_feat_level = index_given_feat_level,
encoded_features = encoded_features,
)
return cache
end
Expand All @@ -58,7 +60,7 @@ Encode the levels of a categorical variable in a given table as integers.

- `X_tr`: The table with selected features after the selected features are encoded by ordinal encoding.
"""
function ordinal_encoder_transform(X, cache::Dict)
index_given_feat_level = cache[:index_given_feat_level]
function ordinal_encoder_transform(X, cache::NamedTuple)
index_given_feat_level = cache.index_given_feat_level
return generic_transform(X, index_given_feat_level)
end
23 changes: 11 additions & 12 deletions src/encoders/target_encoding/interface_mlj.jl
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ struct TargetEncoderResult{
task::S # "Regression", "Classification"
num_classes::I # num_classes in case of classification
y_classes::A # y_classes in case of classification

end


Expand All @@ -75,25 +75,24 @@ function MMI.fit(transformer::TargetEncoder, verbosity::Int, X, y)
m = transformer.m,
)
fitresult = TargetEncoderResult(
generic_cache[:y_stat_given_feat_level],
generic_cache[:task],
generic_cache[:num_classes],
generic_cache[:y_classes],
generic_cache.y_stat_given_feat_level,
generic_cache.task,
generic_cache.num_classes,
generic_cache.y_classes,
)
report = (encoded_features = generic_cache[:encoded_features],) # report only has list of encoded features
report = (encoded_features = generic_cache.encoded_features,) # report only has list of encoded features
cache = nothing
return fitresult, cache, report
end;


# 7. Transform method
function MMI.transform(transformer::TargetEncoder, fitresult, Xnew)
generic_cache = Dict(
:y_stat_given_feat_level =>
fitresult.y_stat_given_feat_level,
:num_classes => fitresult.num_classes,
:task => fitresult.task,
:y_classes => fitresult.y_classes,
generic_cache = (
y_stat_given_feat_level = fitresult.y_stat_given_feat_level,
num_classes = fitresult.num_classes,
task = fitresult.task,
y_classes = fitresult.y_classes,
)
Xnew_transf = target_encoder_transform(Xnew, generic_cache)
return Xnew_transf
Expand Down
22 changes: 11 additions & 11 deletions src/encoders/target_encoding/target_encoding.jl
Original file line number Diff line number Diff line change
Expand Up @@ -211,12 +211,12 @@ function target_encoder_fit(
feature_mapper = feature_mapper,
)

cache = Dict(
:task => task,
:num_classes => (task == "Regression") ? -1 : length(y_classes),
:y_stat_given_feat_level => y_stat_given_feat_level,
:encoded_features => encoded_features,
:y_classes => (task == "Regression") ? nothing : y_classes,
cache = (
task = task,
num_classes = (task == "Regression") ? -1 : length(y_classes),
y_stat_given_feat_level = y_stat_given_feat_level,
encoded_features = encoded_features,
y_classes = (task == "Regression") ? nothing : y_classes,
)
return cache
end
Expand All @@ -242,16 +242,16 @@ every categorical feature as well as other metadata needed for transform
"""

function target_encoder_transform(X, cache)
task = cache[:task]
y_stat_given_feat_level = cache[:y_stat_given_feat_level]
num_classes = cache[:num_classes]
y_classes = cache[:y_classes]
task = cache.task
y_stat_given_feat_level = cache.y_stat_given_feat_level
num_classes = cache.num_classes
y_classes = cache.y_classes

return generic_transform(
X,
y_stat_given_feat_level;
single_feat = task == "Regression" || (task == "Classification" && num_classes < 3),
use_levelnames = true,
custom_levels = y_classes,)
custom_levels = y_classes)
end

17 changes: 11 additions & 6 deletions src/transformers/cardinality_reducer/cardinality_reducer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,9 @@ function cardinality_reducer_fit(
X, features; ignore = ignore, ordered_factor = ordered_factor,
feature_mapper = feature_mapper,
)
cache = Dict(
:new_cat_given_col_val => new_cat_given_col_val,
:encoded_features => encoded_features,
cache = (
new_cat_given_col_val = new_cat_given_col_val,
encoded_features = encoded_features,
)
return cache
end
Expand All @@ -128,7 +128,12 @@ Apply a fitted cardinality reducer to a table given the output of `cardinality_r

- `X_tr`: The table with selected features after the selected features are transformed by cardinality reducer
"""
function cardinality_reducer_transform(X, cache::Dict)
new_cat_given_col_val = cache[:new_cat_given_col_val]
return generic_transform(X, new_cat_given_col_val; ignore_unknown = true, ensure_categorical = true)
function cardinality_reducer_transform(X, cache::NamedTuple)
new_cat_given_col_val = cache.new_cat_given_col_val
return generic_transform(
X,
new_cat_given_col_val;
ignore_unknown = true,
ensure_categorical = true,
)
end
17 changes: 11 additions & 6 deletions src/transformers/cardinality_reducer/interface_mlj.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,13 @@ function CardinalityReducer(;
Char => 'O',
),
)
return CardinalityReducer(features, ignore, ordered_factor, min_frequency, label_for_infrequent)
return CardinalityReducer(
features,
ignore,
ordered_factor,
min_frequency,
label_for_infrequent,
)
end;


Expand All @@ -43,19 +49,18 @@ function MMI.fit(transformer::CardinalityReducer, verbosity::Int, X)
min_frequency = transformer.min_frequency,
label_for_infrequent = transformer.label_for_infrequent,
)
fitresult = generic_cache[:new_cat_given_col_val]
fitresult = generic_cache.new_cat_given_col_val

report = (encoded_features = generic_cache[:encoded_features],) # report only has list of encoded features
report = (encoded_features = generic_cache.encoded_features,) # report only has list of encoded features
cache = nothing
return fitresult, cache, report
end;


# 6. Transform method
function MMI.transform(transformer::CardinalityReducer, fitresult, Xnew)
generic_cache = Dict(
:new_cat_given_col_val =>
fitresult,
generic_cache = (
new_cat_given_col_val = fitresult,
)
Xnew_transf = cardinality_reducer_transform(Xnew, generic_cache)
return Xnew_transf
Expand Down
Loading