Skip to content

Commit 99ccadc

Browse files
committed
✨ MLJ Update
1 parent c283429 commit 99ccadc

File tree

8 files changed

+36
-34
lines changed

8 files changed

+36
-34
lines changed

docs/src/tutorials/adult_example/Project.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7"
1212
MLJGLMInterface = "caf8df21-4939-456d-ac9c-5fefbfb04c0c"
1313
MLJLinearModels = "6ee0df7b-362f-4a72-a706-9e79364fb692"
1414
MLJModels = "d491faf4-2d78-11e9-2867-c94bc002c0b7"
15-
MLJTransforms = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6"
1615
MLJXGBoostInterface = "54119dfa-1dab-4055-a167-80440f4f7a91"
1716
NearestNeighborModels = "636a865e-7cf4-491e-846c-de09b730eb36"
1817
PalmerPenguins = "8b842266-38fa-440a-9b57-31493939ab85"
-1.12 KB
Loading

docs/src/tutorials/adult_example/notebook.jl

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,11 @@
1616
# demonstrate how encoders handle extreme cardinality - a common real-world scenario with
1717
# features like customer IDs, product codes, or geographical subdivisions.
1818

19-
# packages are already activated by generate.jl
19+
using Pkg;
20+
Pkg.activate(@__DIR__);
21+
Pkg.instantiate(); #src
2022

21-
using MLJ, MLJTransforms, DataFrames, ScientificTypes
23+
using MLJ, DataFrames, ScientificTypes
2224
using Random, CSV, StatsBase, Plots, BenchmarkTools
2325

2426
# Import scitypes from MLJ to avoid any package version skew
@@ -123,7 +125,6 @@ train, test = partition(eachindex(y), 0.8, shuffle = true, rng = 100);
123125
# ## Setup Encoders and Model
124126
# Load the required models and create different encoding strategies:
125127

126-
OneHot = @load OneHotEncoder pkg = MLJModels verbosity = 0
127128
CatBoostClassifier = @load CatBoostClassifier pkg = CatBoost
128129

129130

@@ -140,8 +141,8 @@ card_reducer = MLJTransforms.CardinalityReducer(
140141
Char => 'O',
141142
),
142143
)
143-
onehot_model = OneHot(drop_last = true, ordered_factor = true)
144-
freq_model = MLJTransforms.FrequencyEncoder(normalize = false, ordered_factor = true)
144+
onehot_model = OneHotEncoder(drop_last = true, ordered_factor = true)
145+
freq_model = FrequencyEncoder(normalize = false, ordered_factor = true)
145146
cat = CatBoostClassifier();
146147

147148
# Create three different pipelines to compare:

docs/src/tutorials/adult_example/notebook.md

Lines changed: 17 additions & 12 deletions
Large diffs are not rendered by default.

docs/src/tutorials/classic_comparison/Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@ GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
66
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
77
LIBSVM = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b"
88
LightGBM = "7acf609c-83a4-11e9-1ffb-b912bcd3b04a"
9+
Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306"
910
MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7"
1011
MLJGLMInterface = "caf8df21-4939-456d-ac9c-5fefbfb04c0c"
1112
MLJLinearModels = "6ee0df7b-362f-4a72-a706-9e79364fb692"
1213
MLJModels = "d491faf4-2d78-11e9-2867-c94bc002c0b7"
13-
MLJTransforms = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6"
1414
MLJXGBoostInterface = "54119dfa-1dab-4055-a167-80440f4f7a91"
1515
NearestNeighborModels = "636a865e-7cf4-491e-846c-de09b730eb36"
1616
PalmerPenguins = "8b842266-38fa-440a-9b57-31493939ab85"

docs/src/tutorials/classic_comparison/notebook.jl

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ using Pkg;
1313
Pkg.activate(@__DIR__);
1414
Pkg.instantiate(); #src
1515

16-
using MLJ, MLJTransforms, LIBSVM, DataFrames, ScientificTypes
16+
using MLJ, LIBSVM, DataFrames, ScientificTypes
1717
using Random, CSV, Plots
1818

1919
# ## Load and Prepare Data
@@ -33,7 +33,7 @@ ScientificTypes.schema(df)
3333
# Automatically coerce columns with few unique values to categorical:
3434
df = coerce(df, autotype(df, :few_to_finite))
3535

36-
ScientificTypes.schema(df)
36+
schema(df)
3737

3838
# ## Split Data
3939
# Separate features from target and create train/test split:
@@ -43,7 +43,6 @@ train, test = partition(eachindex(y), 0.9, shuffle = true, rng = 100);
4343
# ## Setup Encoders and Classifier
4444
# Load the required models and create different encoding strategies:
4545

46-
OneHot = @load OneHotEncoder pkg = MLJModels verbosity = 0
4746
SVC = @load SVC pkg = LIBSVM verbosity = 0
4847

4948
# **Encoding Strategies Explained:**
@@ -52,10 +51,10 @@ SVC = @load SVC pkg = LIBSVM verbosity = 0
5251
# 3. **Target**: Uses target statistics for each category
5352
# 4. **Ordinal**: Assigns integer codes to categories (assumes ordering)
5453

55-
onehot_model = OneHot(drop_last = true, ordered_factor = true)
56-
freq_model = MLJTransforms.FrequencyEncoder(normalize = false, ordered_factor = true)
57-
target_model = MLJTransforms.TargetEncoder(lambda = 0.9, m = 5, ordered_factor = true)
58-
ordinal_model = MLJTransforms.OrdinalEncoder(ordered_factor = true)
54+
onehot_model = OneHotEncoder(drop_last = true, ordered_factor = true)
55+
freq_model = FrequencyEncoder(normalize = false, ordered_factor = true)
56+
target_model = TargetEncoder(lambda = 0.9, m = 5, ordered_factor = true)
57+
ordinal_model = OrdinalEncoder(ordered_factor = true)
5958
svm = SVC()
6059

6160
# Create four different pipelines to compare:

docs/src/tutorials/classic_comparison/notebook.md

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ OneHot, Frequency, Target, and Ordinal encoders paired with SVM classification.
1616
using Pkg;
1717
Pkg.activate(@__DIR__);
1818

19-
using MLJ, MLJTransforms, LIBSVM, DataFrames, ScientificTypes
19+
using MLJ, LIBSVM, DataFrames, ScientificTypes
2020
using Random, CSV, Plots
2121
````
2222

@@ -65,7 +65,7 @@ Automatically coerce columns with few unique values to categorical:
6565
````julia
6666
df = coerce(df, autotype(df, :few_to_finite))
6767

68-
ScientificTypes.schema(df)
68+
schema(df)
6969
````
7070

7171
````
@@ -96,7 +96,6 @@ train, test = partition(eachindex(y), 0.9, shuffle = true, rng = 100);
9696
Load the required models and create different encoding strategies:
9797

9898
````julia
99-
OneHot = @load OneHotEncoder pkg = MLJModels verbosity = 0
10099
SVC = @load SVC pkg = LIBSVM verbosity = 0
101100
````
102101

@@ -111,10 +110,10 @@ MLJLIBSVMInterface.SVC
111110
4. **Ordinal**: Assigns integer codes to categories (assumes ordering)
112111

113112
````julia
114-
onehot_model = OneHot(drop_last = true, ordered_factor = true)
115-
freq_model = MLJTransforms.FrequencyEncoder(normalize = false, ordered_factor = true)
116-
target_model = MLJTransforms.TargetEncoder(lambda = 0.9, m = 5, ordered_factor = true)
117-
ordinal_model = MLJTransforms.OrdinalEncoder(ordered_factor = true)
113+
onehot_model = OneHotEncoder(drop_last = true, ordered_factor = true)
114+
freq_model = FrequencyEncoder(normalize = false, ordered_factor = true)
115+
target_model = TargetEncoder(lambda = 0.9, m = 5, ordered_factor = true)
116+
ordinal_model = OrdinalEncoder(ordered_factor = true)
118117
svm = SVC()
119118
````
120119

docs/src/tutorials/standardization/Project.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,5 @@ LIBSVM = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b"
44
MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7"
55
MLJLinearModels = "6ee0df7b-362f-4a72-a706-9e79364fb692"
66
MLJModels = "d491faf4-2d78-11e9-2867-c94bc002c0b7"
7-
MLJTransforms = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6"
87
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
98
RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b"

0 commit comments

Comments
 (0)