@@ -64,7 +64,7 @@ first(df, 5)
64
64
````
65
65
66
66
``` @raw html
67
- <div><div style = "float: left;"><span>5×8 DataFrame</span></div><div style = "clear: both;"></div></div><div class = "data-frame" style = "overflow-x: scroll;"><table class = "data-frame" style = "margin-bottom: 6px;"><thead><tr class = "header"><th class = "rowNumber" style = "font-weight: bold; text-align: right;">Row</th><th style = "text-align: left;">NPreg</th><th style = "text-align: left;">Glu</th><th style = "text-align: left;">BP</th><th style = "text-align: left;">Skin</th><th style = "text-align: left;">BMI</th><th style = "text-align: left;">Ped</th><th style = "text-align: left;">Age</th><th style = "text-align: left;">Type</th></tr><tr class = "subheader headerLastRow"><th class = "rowNumber" style = "font-weight: bold; text-align: right;"></th><th title = "Int32" style = "text-align: left;">Int32</th><th title = "Float64" style = "text-align: left;">Float64</th><th title = "Int32" style = "text-align: left;">Int32</th><th title = "Int32" style = "text-align: left;">Int32</th><th title = "Float64" style = "text-align: left;">Float64</th><th title = "Float64" style = "text-align: left;">Float64</th><th title = "Int32" style = "text-align: left;">Int32</th><th title = "CategoricalArrays.CategoricalValue{String, UInt8}" style = "text-align: left;">Cat…</th></tr></thead><tbody><tr><td class = "rowNumber" style = "font-weight: bold; text-align: right;">1</td><td style = "text-align: right;">5</td><td style = "text-align: right;">10086.0</td><td style = "text-align: right;">68</td><td style = "text-align: right;">28</td><td style = "text-align: right;">30.2</td><td style = "text-align: right;">0.364</td><td style = "text-align: right;">24</td><td style = "text-align: left;">No</td></tr><tr><td class = "rowNumber" style = "font-weight: bold; text-align: right;">2</td><td style = "text-align: right;">7</td><td style = "text-align: right;">10195.0</td><td style = "text-align: right;">70</td><td style = "text-align: right;">33</td><td style = "text-align: right;">25.1</td><td style = "text-align: right;">0.163</td><td style = "text-align: right;">55</td><td style = "text-align: left;">Yes</td></tr><tr><td class = "rowNumber" style = "font-weight: bold; text-align: right;">3</td><td style = "text-align: right;">5</td><td style = "text-align: right;">10077.0</td><td style = "text-align: right;">82</td><td style = "text-align: right;">41</td><td style = "text-align: right;">35.8</td><td style = "text-align: right;">0.156</td><td style = "text-align: right;">35</td><td style = "text-align: left;">No</td></tr><tr><td class = "rowNumber" style = "font-weight: bold; text-align: right;">4</td><td style = "text-align: right;">0</td><td style = "text-align: right;">10165.0</td><td style = "text-align: right;">76</td><td style = "text-align: right;">43</td><td style = "text-align: right;">47.9</td><td style = "text-align: right;">0.259</td><td style = "text-align: right;">26</td><td style = "text-align: left;">No</td></tr><tr><td class = "rowNumber" style = "font-weight: bold; text-align: right;">5</td><td style = "text-align: right;">0</td><td style = "text-align: right;">10107.0</td><td style = "text-align: right;">60</td><td style = "text-align: right;">25</td><td style = "text-align: right;">26.4</td><td style = "text-align: right;">0.133</td><td style = "text-align: right;">23</td><td style = "text-align: left;">No</td></tr></tbody></table></div>
67
+ <div><div style = "float: left;"><span>5×8 DataFrame</span></div><div style = "clear: both;"></div></div><div class = "data-frame" style = "overflow-x: scroll;"><table class = "data-frame" style = "margin-bottom: 6px;"><thead><tr class = "header"><th class = "rowNumber" style = "font-weight: bold; text-align: right;">Row</th><th style = "text-align: left;">NPreg</th><th style = "text-align: left;">Glu</th><th style = "text-align: left;">BP</th><th style = "text-align: left;">Skin</th><th style = "text-align: left;">BMI</th><th style = "text-align: left;">Ped</th><th style = "text-align: left;">Age</th><th style = "text-align: left;">Type</th></tr><tr class = "subheader headerLastRow"><th class = "rowNumber" style = "font-weight: bold; text-align: right;"></th><th title = "Int32" style = "text-align: left;">Int32</th><th title = "Float64" style = "text-align: left;">Float64</th><th title = "Int32" style = "text-align: left;">Int32</th><th title = "Int32" style = "text-align: left;">Int32</th><th title = "Float64" style = "text-align: left;">Float64</th><th title = "Float64" style = "text-align: left;">Float64</th><th title = "Int32" style = "text-align: left;">Int32</th><th title = "CategoricalValue{String, UInt8}" style = "text-align: left;">Cat…</th></tr></thead><tbody><tr><td class = "rowNumber" style = "font-weight: bold; text-align: right;">1</td><td style = "text-align: right;">5</td><td style = "text-align: right;">10086.0</td><td style = "text-align: right;">68</td><td style = "text-align: right;">28</td><td style = "text-align: right;">30.2</td><td style = "text-align: right;">0.364</td><td style = "text-align: right;">24</td><td style = "text-align: left;">No</td></tr><tr><td class = "rowNumber" style = "font-weight: bold; text-align: right;">2</td><td style = "text-align: right;">7</td><td style = "text-align: right;">10195.0</td><td style = "text-align: right;">70</td><td style = "text-align: right;">33</td><td style = "text-align: right;">25.1</td><td style = "text-align: right;">0.163</td><td style = "text-align: right;">55</td><td style = "text-align: left;">Yes</td></tr><tr><td class = "rowNumber" style = "font-weight: bold; text-align: right;">3</td><td style = "text-align: right;">5</td><td style = "text-align: right;">10077.0</td><td style = "text-align: right;">82</td><td style = "text-align: right;">41</td><td style = "text-align: right;">35.8</td><td style = "text-align: right;">0.156</td><td style = "text-align: right;">35</td><td style = "text-align: left;">No</td></tr><tr><td class = "rowNumber" style = "font-weight: bold; text-align: right;">4</td><td style = "text-align: right;">0</td><td style = "text-align: right;">10165.0</td><td style = "text-align: right;">76</td><td style = "text-align: right;">43</td><td style = "text-align: right;">47.9</td><td style = "text-align: right;">0.259</td><td style = "text-align: right;">26</td><td style = "text-align: left;">No</td></tr><tr><td class = "rowNumber" style = "font-weight: bold; text-align: right;">5</td><td style = "text-align: right;">0</td><td style = "text-align: right;">10107.0</td><td style = "text-align: right;">60</td><td style = "text-align: right;">25</td><td style = "text-align: right;">26.4</td><td style = "text-align: right;">0.133</td><td style = "text-align: right;">23</td><td style = "text-align: left;">No</td></tr></tbody></table></div>
68
68
```
69
69
70
70
### Data Type Conversion
@@ -151,9 +151,9 @@ stand = Standardizer() # This is our standardization transformer
151
151
152
152
# Create pipelines for each model variant
153
153
logreg_pipe = logreg () # Plain logistic regression
154
- logreg_std_pipe = Pipeline ( stand, logreg () ) # Logistic regression with standardization
154
+ logreg_std_pipe = stand |> logreg () # Logistic regression with standardization
155
155
svm_pipe = svm () # Plain SVM
156
- svm_std_pipe = Pipeline ( stand, svm () ) # SVM with standardization
156
+ svm_std_pipe = stand |> svm () # SVM with standardization
157
157
````
158
158
159
159
````
@@ -191,7 +191,7 @@ models = [
191
191
````
192
192
193
193
````
194
- 4-element Vector{Tuple{String, MLJModelInterface. Supervised}}:
194
+ 4-element Vector{Tuple{String, Supervised}}:
195
195
("Logistic Regression", LogisticClassifier(lambda = 2.220446049250313e-16, …))
196
196
("Logistic Regression (standardized)", ProbabilisticPipeline(standardizer = Standardizer(features = Symbol[], …), …))
197
197
("SVM", SVC(kernel = RadialBasis, …))
@@ -201,6 +201,10 @@ models = [
201
201
Now we'll loop through each model, train it, make predictions, and calculate accuracy.
202
202
This will help us compare how standardization affects each model's performance.
203
203
204
+ Note: As an alternative to the explicit fit!/predict workflow below, we could use:
205
+ evaluate(model, X, y, resampling=[ (train, test),] , measure=accuracy)
206
+ This shortcut handles the training, prediction, and evaluation in one step.
207
+
204
208
```` julia
205
209
# Train and evaluate each model
206
210
results = DataFrame (model = String[], accuracy = Float64[])
@@ -243,10 +247,10 @@ end
243
247
│
244
248
│ In the present case:
245
249
│
246
- │ scitype(data) = Tuple{ScientificTypesBase. Table{Union{AbstractVector{ScientificTypesBase. Continuous}, AbstractVector{ScientificTypesBase. Count}}}, AbstractVector{ScientificTypesBase. Multiclass{2}}}
250
+ │ scitype(data) = Tuple{Table{Union{AbstractVector{Continuous}, AbstractVector{Count}}}, AbstractVector{Multiclass{2}}}
247
251
│
248
- │ fit_data_scitype(model) = Tuple{ScientificTypesBase. Table{<:AbstractVector{<:ScientificTypesBase. Continuous}}, AbstractVector{<:ScientificTypesBase. Finite}}
249
- └ @ MLJBase ~/.julia/packages/MLJBase/F1Eh6 /src/machines.jl:237
252
+ │ fit_data_scitype(model) = Tuple{Table{<:AbstractVector{<:Continuous}}, AbstractVector{<:Finite}}
253
+ └ @ MLJBase ~/.julia/packages/MLJBase/7nGJF /src/machines.jl:237
250
254
[ Info: Training machine(LogisticClassifier(lambda = 2.220446049250313e-16, …), …).
251
255
┌ Info: Solver: MLJLinearModels.LBFGS{Optim.Options{Float64, Nothing}, @NamedTuple{}}
252
256
│ optim_options: Optim.Options{Float64, Nothing}
@@ -273,10 +277,10 @@ end
273
277
│
274
278
│ In the present case:
275
279
│
276
- │ scitype(data) = Tuple{ScientificTypesBase. Table{Union{AbstractVector{ScientificTypesBase. Continuous}, AbstractVector{ScientificTypesBase. Count}}}, AbstractVector{ScientificTypesBase. Multiclass{2}}}
280
+ │ scitype(data) = Tuple{Table{Union{AbstractVector{Continuous}, AbstractVector{Count}}}, AbstractVector{Multiclass{2}}}
277
281
│
278
- │ fit_data_scitype(model) = Union{Tuple{ScientificTypesBase. Table{<:AbstractVector{<:ScientificTypesBase. Continuous}}, AbstractVector{<:ScientificTypesBase. Finite}}, Tuple{ScientificTypesBase. Table{<:AbstractVector{<:ScientificTypesBase. Continuous}}, AbstractVector{<:ScientificTypesBase. Finite}, Any}}
279
- └ @ MLJBase ~/.julia/packages/MLJBase/F1Eh6 /src/machines.jl:237
282
+ │ fit_data_scitype(model) = Union{Tuple{Table{<:AbstractVector{<:Continuous}}, AbstractVector{<:Finite}}, Tuple{Table{<:AbstractVector{<:Continuous}}, AbstractVector{<:Finite}, Any}}
283
+ └ @ MLJBase ~/.julia/packages/MLJBase/7nGJF /src/machines.jl:237
280
284
[ Info: Training machine(SVC(kernel = RadialBasis, …), …).
281
285
[ Info: Training machine(DeterministicPipeline(standardizer = Standardizer(features = Symbol[], …), …), …).
282
286
[ Info: Training machine(:standardizer, …).
0 commit comments