From f60f6401c2d37371a95adea0008e42a2d3583d30 Mon Sep 17 00:00:00 2001 From: Essam Date: Mon, 1 Sep 2025 18:13:22 -0500 Subject: [PATCH 1/2] =?UTF-8?q?=E2=9C=A8=20Fix=20table=20bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/generic.jl | 4 +-- test/generic_table_types.jl | 69 +++++++++++++++++++++++++++++++++++++ test/runtests.jl | 1 + 3 files changed, 72 insertions(+), 2 deletions(-) create mode 100644 test/generic_table_types.jl diff --git a/src/generic.jl b/src/generic.jl index a283d7d..eee3897 100644 --- a/src/generic.jl +++ b/src/generic.jl @@ -59,7 +59,7 @@ function generic_fit(X, # 4. Use feature mapper to compute the mapping of each level in each column encoded_features = Symbol[]# to store column that were actually encoded for feat_name in feat_names - feat_col = Tables.getcolumn(X, feat_name) + feat_col = MMI.selectcols(X, feat_name) feat_type = elscitype(feat_col) feat_has_allowed_type = feat_type <: Union{Missing, Multiclass} || @@ -149,7 +149,7 @@ function generic_transform( new_feat_names = Symbol[] new_cols = [] for feat_name in feat_names - col = Tables.getcolumn(X, feat_name) + col = MMI.selectcols(X, feat_name) # Create the transformation function for each column if feat_name in keys(mapping_per_feat_level) if !ignore_unknown diff --git a/test/generic_table_types.jl b/test/generic_table_types.jl new file mode 100644 index 0000000..0754bca --- /dev/null +++ b/test/generic_table_types.jl @@ -0,0 +1,69 @@ +using Test +using Tables +using CategoricalArrays + + + +@testset "Generic Table Types Support" begin + + # Create test data as in the issue + x = vcat(collect("abc"), fill('d', 100)) + x = coerce(x, Multiclass) + + # Column table (NamedTuple of vectors) - this already works + coltable = (; x) + + # Row table (Vector of NamedTuples) - this was failing + rowtable = Tables.rowtable(coltable) + + # List of models that were affected by the issue + models_to_test = [ + CardinalityReducer(), + FrequencyEncoder(), + MissingnessEncoder(), + OrdinalEncoder(), + ] + + @testset "Model: $(string(typeof(model)))" for model in models_to_test + + @testset "Column Table Support" begin + mach_col = machine(model, coltable) + MLJBase.fit!(mach_col, verbosity=0) + result_col = MLJBase.transform(mach_col, coltable) + + @test !isempty(Tables.columntable(result_col)) + end + + @testset "Row Table Support" begin + # This should now work after the fix + mach_row = machine(model, rowtable) + MLJBase.fit!(mach_row, verbosity=0) + result_row = MLJBase.transform(mach_row, rowtable) + + @test !isempty(Tables.columntable(result_row)) + end + + @testset "Consistency Between Table Types" begin + # Results should be equivalent regardless of table type + mach_col = machine(model, coltable) + MLJBase.fit!(mach_col, verbosity=0) + result_col = MLJBase.transform(mach_col, coltable) + + mach_row = machine(model, rowtable) + MLJBase.fit!(mach_row, verbosity=0) + result_row = MLJBase.transform(mach_row, rowtable) + + # Convert both to column tables for comparison + result_col_ct = Tables.columntable(result_col) + result_row_ct = Tables.columntable(result_row) + + # Should have same column names + @test keys(result_col_ct) == keys(result_row_ct) + + # Should have same values (allowing for potential ordering differences in table types) + for col_name in keys(result_col_ct) + @test Set(result_col_ct[col_name]) == Set(result_row_ct[col_name]) + end + end + end +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 83c593a..4073427 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -20,6 +20,7 @@ _get(x) = CategoricalArrays.DataAPI.unwrap(x) include("utils.jl") include("generic.jl") +include("generic_table_types.jl") # Test for issue #42 fix include("encoders/target_encoding.jl") include("encoders/ordinal_encoding.jl") include("encoders/frequency_encoder.jl") From 615f03ed823dbd32b74831dc943532befe1adef0 Mon Sep 17 00:00:00 2001 From: Essam Date: Mon, 1 Sep 2025 18:24:43 -0500 Subject: [PATCH 2/2] =?UTF-8?q?=F0=9F=91=A8=E2=80=8D=F0=9F=94=A7=20Fix=20f?= =?UTF-8?q?or=20docstrings?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/MLJTransforms.jl | 13 ++++++------ .../other_transformers/continuous_encoder.jl | 8 ++++++++ .../other_transformers/fill_imputer.jl | 16 ++++++++++++++- .../interaction_transformer.jl | 8 ++++++++ .../other_transformers/metadata_shared.jl | 20 ------------------- .../other_transformers/one_hot_encoder.jl | 8 ++++++++ .../other_transformers/standardizer.jl | 8 ++++++++ .../univariate_boxcox_transformer.jl | 8 ++++++++ .../univariate_discretizer.jl | 7 +++++++ .../univariate_time_type_to_continuous.jl | 8 ++++++++ 10 files changed, 76 insertions(+), 28 deletions(-) delete mode 100644 src/transformers/other_transformers/metadata_shared.jl diff --git a/src/MLJTransforms.jl b/src/MLJTransforms.jl index 683a51a..4c2968b 100644 --- a/src/MLJTransforms.jl +++ b/src/MLJTransforms.jl @@ -5,7 +5,7 @@ using Tables # https://github.com/JuliaAI/MLJBase.jl/issues/1002 import ScientificTypes: elscitype, schema, coerce, ScientificTimeType using MLJModelInterface # exports `scitype`, which will call `ScientificTypes.scitype`, - # once MLJBase is loaded (but this is not a dependency!) +# once MLJBase is loaded (but this is not a dependency!) using CategoricalArrays using TableOperations using StatsBase @@ -29,27 +29,27 @@ include("utils.jl") include("encoders/target_encoding/errors.jl") include("encoders/target_encoding/target_encoding.jl") include("encoders/target_encoding/interface_mlj.jl") -export TargetEncoder +export TargetEncoder # Ordinal encoding include("encoders/ordinal_encoding/ordinal_encoding.jl") include("encoders/ordinal_encoding/interface_mlj.jl") -export OrdinalEncoder +export OrdinalEncoder # Frequency encoding include("encoders/frequency_encoding/frequency_encoding.jl") include("encoders/frequency_encoding/interface_mlj.jl") export frequency_encoder_fit, frequency_encoder_transform, FrequencyEncoder -export FrequencyEncoder +export FrequencyEncoder # Cardinality reduction include("transformers/cardinality_reducer/cardinality_reducer.jl") include("transformers/cardinality_reducer/interface_mlj.jl") export cardinality_reducer_fit, cardinality_reducer_transform, CardinalityReducer -export CardinalityReducer +export CardinalityReducer include("encoders/missingness_encoding/missingness_encoding.jl") include("encoders/missingness_encoding/interface_mlj.jl") -export MissingnessEncoder +export MissingnessEncoder # Contrast encoder include("encoders/contrast_encoder/contrast_encoder.jl") @@ -65,7 +65,6 @@ include("transformers/other_transformers/one_hot_encoder.jl") include("transformers/other_transformers/standardizer.jl") include("transformers/other_transformers/univariate_boxcox_transformer.jl") include("transformers/other_transformers/univariate_discretizer.jl") -include("transformers/other_transformers/metadata_shared.jl") export UnivariateDiscretizer, UnivariateStandardizer, Standardizer, UnivariateBoxCoxTransformer, diff --git a/src/transformers/other_transformers/continuous_encoder.jl b/src/transformers/other_transformers/continuous_encoder.jl index 7b232d5..f5b9b56 100644 --- a/src/transformers/other_transformers/continuous_encoder.jl +++ b/src/transformers/other_transformers/continuous_encoder.jl @@ -75,6 +75,14 @@ metadata_model(ContinuousEncoder, output_scitype = Table(Continuous), load_path = "MLJTransforms.ContinuousEncoder") +# Package metadata for docstring generation +metadata_pkg(ContinuousEncoder, + package_name = "MLJTransforms", + package_uuid = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6", + package_url = "https://github.com/JuliaAI/MLJTransforms.jl", + is_pure_julia = true, + package_license = "MIT") + """ $(MLJModelInterface.doc_header(ContinuousEncoder)) diff --git a/src/transformers/other_transformers/fill_imputer.jl b/src/transformers/other_transformers/fill_imputer.jl index dfe7bfe..183be54 100644 --- a/src/transformers/other_transformers/fill_imputer.jl +++ b/src/transformers/other_transformers/fill_imputer.jl @@ -194,6 +194,14 @@ metadata_model(FillImputer, output_scitype = Table, load_path = "MLJTransforms.FillImputer") +# Package metadata for docstring generation +metadata_pkg(UnivariateFillImputer, + package_name = "MLJTransforms", + package_uuid = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6", + package_url = "https://github.com/JuliaAI/MLJTransforms.jl", + is_pure_julia = true, + package_license = "MIT") + """ $(MLJModelInterface.doc_header(UnivariateFillImputer)) @@ -294,7 +302,13 @@ For imputing tabular data, use [`FillImputer`](@ref). """ UnivariateFillImputer - +# Package metadata for docstring generation +metadata_pkg(FillImputer, + package_name = "MLJTransforms", + package_uuid = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6", + package_url = "https://github.com/JuliaAI/MLJTransforms.jl", + is_pure_julia = true, + package_license = "MIT") """ $(MLJModelInterface.doc_header(FillImputer)) diff --git a/src/transformers/other_transformers/interaction_transformer.jl b/src/transformers/other_transformers/interaction_transformer.jl index 61f3acd..20d36ca 100644 --- a/src/transformers/other_transformers/interaction_transformer.jl +++ b/src/transformers/other_transformers/interaction_transformer.jl @@ -40,6 +40,14 @@ metadata_model(InteractionTransformer, human_name = "interaction transformer", load_path = "MLJTransforms.InteractionTransformer") +# Package metadata for docstring generation +metadata_pkg(InteractionTransformer, + package_name = "MLJTransforms", + package_uuid = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6", + package_url = "https://github.com/JuliaAI/MLJTransforms.jl", + is_pure_julia = true, + package_license = "MIT") + """ $(MLJModelInterface.doc_header(InteractionTransformer)) diff --git a/src/transformers/other_transformers/metadata_shared.jl b/src/transformers/other_transformers/metadata_shared.jl deleted file mode 100644 index 91bbfd4..0000000 --- a/src/transformers/other_transformers/metadata_shared.jl +++ /dev/null @@ -1,20 +0,0 @@ -# # METADATA FOR BUILT-IN TRANSFORMERS - -metadata_pkg.( - ( - UnivariateStandardizer, - UnivariateDiscretizer, - Standardizer, - UnivariateBoxCoxTransformer, - UnivariateFillImputer, - OneHotEncoder, - FillImputer, - ContinuousEncoder, - UnivariateTimeTypeToContinuous, - InteractionTransformer - ), - package_name = "MLJTransforms", - package_uuid = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6", - package_url = "https://github.com/JuliaAI/MLJTransforms.jl", - is_pure_julia = true, - package_license = "MIT") \ No newline at end of file diff --git a/src/transformers/other_transformers/one_hot_encoder.jl b/src/transformers/other_transformers/one_hot_encoder.jl index bae5b75..97363e9 100644 --- a/src/transformers/other_transformers/one_hot_encoder.jl +++ b/src/transformers/other_transformers/one_hot_encoder.jl @@ -165,6 +165,14 @@ metadata_model(OneHotEncoder, human_name = "one-hot encoder", load_path = "MLJTransforms.OneHotEncoder") +# Package metadata for docstring generation +metadata_pkg(OneHotEncoder, + package_name = "MLJTransforms", + package_uuid = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6", + package_url = "https://github.com/JuliaAI/MLJTransforms.jl", + is_pure_julia = true, + package_license = "MIT") + """ $(MLJModelInterface.doc_header(OneHotEncoder)) diff --git a/src/transformers/other_transformers/standardizer.jl b/src/transformers/other_transformers/standardizer.jl index e325096..378b637 100644 --- a/src/transformers/other_transformers/standardizer.jl +++ b/src/transformers/other_transformers/standardizer.jl @@ -236,6 +236,14 @@ metadata_model(Standardizer, output_scitype = Union{Table, AbstractVector{<:Continuous}}, load_path = "MLJTransforms.Standardizer") +# Package metadata for docstring generation +metadata_pkg(Standardizer, + package_name = "MLJTransforms", + package_uuid = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6", + package_url = "https://github.com/JuliaAI/MLJTransforms.jl", + is_pure_julia = true, + package_license = "MIT") + """ $(MLJModelInterface.doc_header(Standardizer)) diff --git a/src/transformers/other_transformers/univariate_boxcox_transformer.jl b/src/transformers/other_transformers/univariate_boxcox_transformer.jl index aba6250..8f9314b 100644 --- a/src/transformers/other_transformers/univariate_boxcox_transformer.jl +++ b/src/transformers/other_transformers/univariate_boxcox_transformer.jl @@ -93,6 +93,14 @@ metadata_model(UnivariateBoxCoxTransformer, human_name = "single variable Box-Cox transformer", load_path = "MLJTransforms.UnivariateBoxCoxTransformer") +# Package metadata for docstring generation +metadata_pkg(UnivariateBoxCoxTransformer, + package_name = "MLJTransforms", + package_uuid = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6", + package_url = "https://github.com/JuliaAI/MLJTransforms.jl", + is_pure_julia = true, + package_license = "MIT") + """ $(MLJModelInterface.doc_header(UnivariateBoxCoxTransformer)) diff --git a/src/transformers/other_transformers/univariate_discretizer.jl b/src/transformers/other_transformers/univariate_discretizer.jl index 669d096..87ac3a6 100644 --- a/src/transformers/other_transformers/univariate_discretizer.jl +++ b/src/transformers/other_transformers/univariate_discretizer.jl @@ -96,6 +96,13 @@ metadata_model(UnivariateDiscretizer, human_name = "single variable discretizer", load_path = "MLJTransforms.UnivariateDiscretizer") +# Package metadata for docstring generation +metadata_pkg(UnivariateDiscretizer, + package_name = "MLJTransforms", + package_uuid = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6", + package_url = "https://github.com/JuliaAI/MLJTransforms.jl", + is_pure_julia = true, + package_license = "MIT") """ $(MLJModelInterface.doc_header(UnivariateDiscretizer)) diff --git a/src/transformers/other_transformers/univariate_time_type_to_continuous.jl b/src/transformers/other_transformers/univariate_time_type_to_continuous.jl index 3fb71f2..5e3a530 100644 --- a/src/transformers/other_transformers/univariate_time_type_to_continuous.jl +++ b/src/transformers/other_transformers/univariate_time_type_to_continuous.jl @@ -131,6 +131,14 @@ metadata_model(UnivariateTimeTypeToContinuous, "continuous representations of temporally typed data", load_path = "MLJTransforms.UnivariateTimeTypeToContinuous") +# Package metadata for docstring generation +metadata_pkg(UnivariateTimeTypeToContinuous, + package_name = "MLJTransforms", + package_uuid = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6", + package_url = "https://github.com/JuliaAI/MLJTransforms.jl", + is_pure_julia = true, + package_license = "MIT") + """ $(MLJModelInterface.doc_header(UnivariateTimeTypeToContinuous))