@@ -32,6 +32,7 @@ Pkg.activate(@__DIR__);
32
32
33
33
# Import all required packages
34
34
using MLJ
35
+ using MLJFlux
35
36
using CategoricalArrays
36
37
using DataFrames
37
38
using Optimisers
@@ -184,7 +185,7 @@ println("\nUnique rating categories: $(sort(unique(df.RatingCategory)))")
184
185
185
186
````
186
187
Distribution of categorical rating labels:
187
- OrderedCollections.OrderedDict{CategoricalArrays. CategoricalValue{String, UInt32}, Int64}("1.0" => 17, "1.5" => 18, "2.0" => 53, "2.5" => 105, "3.0" => 281, "3.5" => 722, "4.0" => 2420, "4.5" => 3542, "5.0" => 571, "NaN" => 1416)
188
+ OrderedCollections.OrderedDict{CategoricalValue{String, UInt32}, Int64}("1.0" => 17, "1.5" => 18, "2.0" => 53, "2.5" => 105, "3.0" => 281, "3.5" => 722, "4.0" => 2420, "4.5" => 3542, "5.0" => 571, "NaN" => 1416)
188
189
189
190
Unique rating categories: ["1.0", "1.5", "2.0", "2.5", "3.0", "3.5", "4.0", "4.5", "5.0", "NaN"]
190
191
@@ -207,28 +208,28 @@ df = coerce(df,
207
208
Symbol (" Content Rating" ) => Multiclass,
208
209
:Genres => Multiclass,
209
210
Symbol (" Android Ver" ) => Multiclass,
210
- :Rating => Continuous, # # Keep original for reference
211
- :RatingCategory => Multiclass, # # New categorical target
211
+ :Rating => Continuous, # # Keep original for reference
212
+ :RatingCategory => OrderedFactor, # # New categorical target
212
213
);
213
214
schema (df)
214
215
````
215
216
216
217
````
217
- ┌────────────────┬────────────────┬────────────────────────────────────┐
218
- │ names │ scitypes │ types │
219
- ├────────────────┼────────────────┼────────────────────────────────────┤
220
- │ Category │ Multiclass{33} │ CategoricalValue{String31, UInt32} │
221
- │ Reviews │ Continuous │ Float64 │
222
- │ Size │ Continuous │ Float64 │
223
- │ Installs │ Continuous │ Float64 │
224
- │ Type │ Multiclass{2} │ CategoricalValue{String7, UInt32} │
225
- │ Price │ Continuous │ Float64 │
226
- │ Content Rating │ Multiclass{6} │ CategoricalValue{String15, UInt32} │
227
- │ Genres │ Multiclass{48} │ CategoricalValue{String, UInt32} │
228
- │ Android Ver │ Multiclass{34} │ CategoricalValue{String31, UInt32} │
229
- │ Rating │ Continuous │ Float64 │
230
- │ RatingCategory │ Multiclass {10} │ CategoricalValue{String, UInt32} │
231
- └────────────────┴────────────────┴────────────────────────────────────┘
218
+ ┌────────────────┬─────────────────── ┬────────────────────────────────────┐
219
+ │ names │ scitypes │ types │
220
+ ├────────────────┼─────────────────── ┼────────────────────────────────────┤
221
+ │ Category │ Multiclass{33} │ CategoricalValue{String31, UInt32} │
222
+ │ Reviews │ Continuous │ Float64 │
223
+ │ Size │ Continuous │ Float64 │
224
+ │ Installs │ Continuous │ Float64 │
225
+ │ Type │ Multiclass{2} │ CategoricalValue{String7, UInt32} │
226
+ │ Price │ Continuous │ Float64 │
227
+ │ Content Rating │ Multiclass{6} │ CategoricalValue{String15, UInt32} │
228
+ │ Genres │ Multiclass{48} │ CategoricalValue{String, UInt32} │
229
+ │ Android Ver │ Multiclass{34} │ CategoricalValue{String31, UInt32} │
230
+ │ Rating │ Continuous │ Float64 │
231
+ │ RatingCategory │ OrderedFactor {10} │ CategoricalValue{String, UInt32} │
232
+ └────────────────┴─────────────────── ┴────────────────────────────────────┘
232
233
233
234
````
234
235
@@ -250,8 +251,6 @@ X = select(df, Not([:Rating, :RatingCategory])); ## Exclude both rating columns
250
251
stratify = y,
251
252
rng = Random. Xoshiro (41 ),
252
253
);
253
-
254
- using MLJFlux
255
254
````
256
255
257
256
## Building the EntityEmbedder Model
@@ -321,7 +320,7 @@ EntityEmbedder(
321
320
alpha = 0.0,
322
321
rng = 39,
323
322
optimiser_changes_trigger_retraining = false,
324
- acceleration = ComputationalResources. CUDALibs{Nothing}(nothing),
323
+ acceleration = CUDALibs{Nothing}(nothing),
325
324
embedding_dims = Dict{Symbol, Real}(:Category => 2, Symbol("Content Rating") => 2, Symbol("Android Ver") => 2, :Genres => 2, :Type => 2)))
326
325
````
327
326
@@ -351,8 +350,8 @@ After training, we can use the embedder as a transformer to convert categorical
351
350
352
351
```` julia
353
352
# Transform the data using the learned embeddings
354
- X_train_embedded = MLJFlux . transform (mach, X_train)
355
- X_test_embedded = MLJFlux . transform (mach, X_test);
353
+ X_train_embedded = MLJ . transform (mach, X_train)
354
+ X_test_embedded = MLJ . transform (mach, X_test);
356
355
357
356
# Check the schema transformation
358
357
println (" Original schema:" )
@@ -389,8 +388,8 @@ MLJ.fit!(pipe_mach, verbosity = 0)
389
388
trained Machine; does not cache data
390
389
model: ProbabilisticPipeline(entity_embedder = EntityEmbedder(model = NeuralNetworkClassifier(builder = Short(n_hidden = 14, …), …)), …)
391
390
args:
392
- 1: Source @225 ⏎ ScientificTypesBase. Table{Union{AbstractVector{ScientificTypesBase. Continuous}, AbstractVector{ScientificTypesBase. Multiclass{33}}, AbstractVector{ScientificTypesBase. Multiclass{2}}, AbstractVector{ScientificTypesBase. Multiclass{6}}, AbstractVector{ScientificTypesBase. Multiclass{48}}, AbstractVector{ScientificTypesBase. Multiclass{34}}}}
393
- 2: Source @148 ⏎ AbstractVector{ScientificTypesBase.Multiclass {10}}
391
+ 1: Source @927 ⏎ Table{Union{AbstractVector{Continuous}, AbstractVector{Multiclass{33}}, AbstractVector{Multiclass{2}}, AbstractVector{Multiclass{6}}, AbstractVector{Multiclass{48}}, AbstractVector{Multiclass{34}}}}
392
+ 2: Source @044 ⏎ AbstractVector{OrderedFactor {10}}
394
393
395
394
````
396
395
0 commit comments