✨ MLJ Update

EssamWisam · EssamWisam · commit 99ccadc994b8 · 2025-09-23T20:06:03.000-05:00
diff --git a/docs/src/tutorials/adult_example/Project.toml b/docs/src/tutorials/adult_example/Project.toml
@@ -12,7 +12,6 @@ MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7"
 MLJGLMInterface = "caf8df21-4939-456d-ac9c-5fefbfb04c0c"
 MLJLinearModels = "6ee0df7b-362f-4a72-a706-9e79364fb692"
 MLJModels = "d491faf4-2d78-11e9-2867-c94bc002c0b7"
-MLJTransforms = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6"
 MLJXGBoostInterface = "54119dfa-1dab-4055-a167-80440f4f7a91"
 NearestNeighborModels = "636a865e-7cf4-491e-846c-de09b730eb36"
 PalmerPenguins = "8b842266-38fa-440a-9b57-31493939ab85"
diff --git a/docs/src/tutorials/adult_example/adult_encoding_comparison.png b/docs/src/tutorials/adult_example/adult_encoding_comparison.png
diff --git a/docs/src/tutorials/adult_example/notebook.jl b/docs/src/tutorials/adult_example/notebook.jl
@@ -16,9 +16,11 @@
 # demonstrate how encoders handle extreme cardinality - a common real-world scenario with
 # features like customer IDs, product codes, or geographical subdivisions.
 
-# packages are already activated by generate.jl
+using Pkg;
+Pkg.activate(@__DIR__);
+Pkg.instantiate(); #src
 
-using MLJ, MLJTransforms, DataFrames, ScientificTypes
+using MLJ, DataFrames, ScientificTypes
 using Random, CSV, StatsBase, Plots, BenchmarkTools
 
 # Import scitypes from MLJ to avoid any package version skew
@@ -123,7 +125,6 @@ train, test = partition(eachindex(y), 0.8, shuffle = true, rng = 100);
 # ## Setup Encoders and Model
 # Load the required models and create different encoding strategies:
 
-OneHot = @load OneHotEncoder pkg = MLJModels verbosity = 0
 CatBoostClassifier = @load CatBoostClassifier pkg = CatBoost
 
 
@@ -140,8 +141,8 @@ card_reducer = MLJTransforms.CardinalityReducer(
         Char => 'O',
     ),
 )
-onehot_model = OneHot(drop_last = true, ordered_factor = true)
-freq_model = MLJTransforms.FrequencyEncoder(normalize = false, ordered_factor = true)
+onehot_model = OneHotEncoder(drop_last = true, ordered_factor = true)
+freq_model = FrequencyEncoder(normalize = false, ordered_factor = true)
 cat = CatBoostClassifier();
 
 # Create three different pipelines to compare:
diff --git a/docs/src/tutorials/adult_example/notebook.md b/docs/src/tutorials/adult_example/notebook.md
diff --git a/docs/src/tutorials/classic_comparison/Project.toml b/docs/src/tutorials/classic_comparison/Project.toml
@@ -6,11 +6,11 @@ GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
 HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
 LIBSVM = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b"
 LightGBM = "7acf609c-83a4-11e9-1ffb-b912bcd3b04a"
+Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306"
 MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7"
 MLJGLMInterface = "caf8df21-4939-456d-ac9c-5fefbfb04c0c"
 MLJLinearModels = "6ee0df7b-362f-4a72-a706-9e79364fb692"
 MLJModels = "d491faf4-2d78-11e9-2867-c94bc002c0b7"
-MLJTransforms = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6"
 MLJXGBoostInterface = "54119dfa-1dab-4055-a167-80440f4f7a91"
 NearestNeighborModels = "636a865e-7cf4-491e-846c-de09b730eb36"
 PalmerPenguins = "8b842266-38fa-440a-9b57-31493939ab85"
diff --git a/docs/src/tutorials/classic_comparison/notebook.jl b/docs/src/tutorials/classic_comparison/notebook.jl
@@ -13,7 +13,7 @@ using Pkg;
 Pkg.activate(@__DIR__);
 Pkg.instantiate(); #src
 
-using MLJ, MLJTransforms, LIBSVM, DataFrames, ScientificTypes
+using MLJ, LIBSVM, DataFrames, ScientificTypes
 using Random, CSV, Plots
 
 # ## Load and Prepare Data
@@ -33,7 +33,7 @@ ScientificTypes.schema(df)
 # Automatically coerce columns with few unique values to categorical:
 df = coerce(df, autotype(df, :few_to_finite))
 
-ScientificTypes.schema(df)
+schema(df)
 
 # ## Split Data
 # Separate features from target and create train/test split:
@@ -43,7 +43,6 @@ train, test = partition(eachindex(y), 0.9, shuffle = true, rng = 100);
 # ## Setup Encoders and Classifier
 # Load the required models and create different encoding strategies:
 
-OneHot = @load OneHotEncoder pkg = MLJModels verbosity = 0
 SVC = @load SVC pkg = LIBSVM verbosity = 0
 
 # **Encoding Strategies Explained:**
@@ -52,10 +51,10 @@ SVC = @load SVC pkg = LIBSVM verbosity = 0
 # 3. **Target**: Uses target statistics for each category 
 # 4. **Ordinal**: Assigns integer codes to categories (assumes ordering)
 
-onehot_model = OneHot(drop_last = true, ordered_factor = true)
-freq_model = MLJTransforms.FrequencyEncoder(normalize = false, ordered_factor = true)
-target_model = MLJTransforms.TargetEncoder(lambda = 0.9, m = 5, ordered_factor = true)
-ordinal_model = MLJTransforms.OrdinalEncoder(ordered_factor = true)
+onehot_model = OneHotEncoder(drop_last = true, ordered_factor = true)
+freq_model = FrequencyEncoder(normalize = false, ordered_factor = true)
+target_model = TargetEncoder(lambda = 0.9, m = 5, ordered_factor = true)
+ordinal_model = OrdinalEncoder(ordered_factor = true)
 svm = SVC()
 
 # Create four different pipelines to compare:
diff --git a/docs/src/tutorials/classic_comparison/notebook.md b/docs/src/tutorials/classic_comparison/notebook.md
@@ -16,7 +16,7 @@ OneHot, Frequency, Target, and Ordinal encoders paired with SVM classification.
 using Pkg;
 Pkg.activate(@__DIR__);
 
-using MLJ, MLJTransforms, LIBSVM, DataFrames, ScientificTypes
+using MLJ, LIBSVM, DataFrames, ScientificTypes
 using Random, CSV, Plots
 ````
 
@@ -65,7 +65,7 @@ Automatically coerce columns with few unique values to categorical:
 ````julia
 df = coerce(df, autotype(df, :few_to_finite))
 
-ScientificTypes.schema(df)
+schema(df)
 ````
 
 ````
@@ -96,7 +96,6 @@ train, test = partition(eachindex(y), 0.9, shuffle = true, rng = 100);
 Load the required models and create different encoding strategies:
 
 ````julia
-OneHot = @load OneHotEncoder pkg = MLJModels verbosity = 0
 SVC = @load SVC pkg = LIBSVM verbosity = 0
 ````
 
@@ -111,10 +110,10 @@ MLJLIBSVMInterface.SVC
 4. **Ordinal**: Assigns integer codes to categories (assumes ordering)
 
 ````julia
-onehot_model = OneHot(drop_last = true, ordered_factor = true)
-freq_model = MLJTransforms.FrequencyEncoder(normalize = false, ordered_factor = true)
-target_model = MLJTransforms.TargetEncoder(lambda = 0.9, m = 5, ordered_factor = true)
-ordinal_model = MLJTransforms.OrdinalEncoder(ordered_factor = true)
+onehot_model = OneHotEncoder(drop_last = true, ordered_factor = true)
+freq_model = FrequencyEncoder(normalize = false, ordered_factor = true)
+target_model = TargetEncoder(lambda = 0.9, m = 5, ordered_factor = true)
+ordinal_model = OrdinalEncoder(ordered_factor = true)
 svm = SVC()
 ````
 
diff --git a/docs/src/tutorials/standardization/Project.toml b/docs/src/tutorials/standardization/Project.toml
@@ -4,6 +4,5 @@ LIBSVM = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b"
 MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7"
 MLJLinearModels = "6ee0df7b-362f-4a72-a706-9e79364fb692"
 MLJModels = "d491faf4-2d78-11e9-2867-c94bc002c0b7"
-MLJTransforms = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b"