@@ -61,7 +61,7 @@ for col in [:workclass, :education, :marital_status, :occupation, :relationship,
61
61
end
62
62
63
63
# Convert income to binary (0 for <=50K, 1 for >50K)
64
- df. income = ifelse .(df. income .== " >50K" , 1 , 0 )
64
+ df. income = ifelse .(df. income .== " >50K" , 1 , 0 );
65
65
66
66
# Let's a high-cardinality categorical feature to showcase encoder handling
67
67
# Create a realistic frequency distribution: A1-A3 make up 90% of data, A4-A500 make up 10%
@@ -75,11 +75,11 @@ n_rare = n_rows - n_frequent # 10% for A4-A500
75
75
frequent_samples = rand ([" A1" , " A2" , " A3" ], n_frequent)
76
76
77
77
rare_categories = [" A$i " for i in 4 : 500 ]
78
- rare_samples = rand (rare_categories, n_rare)
78
+ rare_samples = rand (rare_categories, n_rare);
79
79
80
80
# Combine and shuffle
81
81
all_samples = vcat (frequent_samples, rare_samples)
82
- df. high_cardinality_feature = all_samples[randperm (n_rows)]
82
+ df. high_cardinality_feature = all_samples[randperm (n_rows)];
83
83
84
84
# Coerce categorical columns to appropriate scientific types.
85
85
# Apply explicit type coercions using fully qualified names
@@ -101,7 +101,7 @@ type_dict = Dict(
101
101
:native_country => Multiclass,
102
102
:high_cardinality_feature => Multiclass,
103
103
)
104
- df = coerce (df, type_dict)
104
+ df = coerce (df, type_dict);
105
105
106
106
# Let's examine the cardinality of our categorical features:
107
107
categorical_cols = [:workclass , :education , :marital_status , :occupation ,
@@ -205,7 +205,7 @@ time_plot = bar(1:n, results.training_time;
205
205
xrotation = 8 ,
206
206
legend = false ,
207
207
color = :lightblue ,
208
- )
208
+ );
209
209
210
210
# accuracy plot
211
211
accuracy_plot = bar (1 : n, results. accuracy;
@@ -216,10 +216,10 @@ accuracy_plot = bar(1:n, results.accuracy;
216
216
legend = false ,
217
217
ylim = (0.0 , 1.0 ),
218
218
color = :lightcoral ,
219
- )
219
+ );
220
220
221
221
222
- combined_plot = plot (time_plot, accuracy_plot; layout = (1 , 2 ), size = (1200 , 500 ))
222
+ combined_plot = plot (time_plot, accuracy_plot; layout = (1 , 2 ), size = (1200 , 500 ));
223
223
224
224
# Save the plot
225
225
savefig (combined_plot, " adult_encoding_comparison.png" ); # hide
0 commit comments