@@ -61,7 +61,7 @@ for col in [:workclass, :education, :marital_status, :occupation, :relationship,
6161end
6262
6363# Convert income to binary (0 for <=50K, 1 for >50K)
64- df. income = ifelse .(df. income .== " >50K" , 1 , 0 )
64+ df. income = ifelse .(df. income .== " >50K" , 1 , 0 );
6565
6666# Let's a high-cardinality categorical feature to showcase encoder handling
6767# Create a realistic frequency distribution: A1-A3 make up 90% of data, A4-A500 make up 10%
@@ -75,11 +75,11 @@ n_rare = n_rows - n_frequent # 10% for A4-A500
7575frequent_samples = rand ([" A1" , " A2" , " A3" ], n_frequent)
7676
7777rare_categories = [" A$i " for i in 4 : 500 ]
78- rare_samples = rand (rare_categories, n_rare)
78+ rare_samples = rand (rare_categories, n_rare);
7979
8080# Combine and shuffle
8181all_samples = vcat (frequent_samples, rare_samples)
82- df. high_cardinality_feature = all_samples[randperm (n_rows)]
82+ df. high_cardinality_feature = all_samples[randperm (n_rows)];
8383
8484# Coerce categorical columns to appropriate scientific types.
8585# Apply explicit type coercions using fully qualified names
@@ -101,7 +101,7 @@ type_dict = Dict(
101101 :native_country => Multiclass,
102102 :high_cardinality_feature => Multiclass,
103103)
104- df = coerce (df, type_dict)
104+ df = coerce (df, type_dict);
105105
106106# Let's examine the cardinality of our categorical features:
107107categorical_cols = [:workclass , :education , :marital_status , :occupation ,
@@ -205,7 +205,7 @@ time_plot = bar(1:n, results.training_time;
205205 xrotation = 8 ,
206206 legend = false ,
207207 color = :lightblue ,
208- )
208+ );
209209
210210# accuracy plot
211211accuracy_plot = bar (1 : n, results. accuracy;
@@ -216,10 +216,10 @@ accuracy_plot = bar(1:n, results.accuracy;
216216 legend = false ,
217217 ylim = (0.0 , 1.0 ),
218218 color = :lightcoral ,
219- )
219+ );
220220
221221
222- combined_plot = plot (time_plot, accuracy_plot; layout = (1 , 2 ), size = (1200 , 500 ))
222+ combined_plot = plot (time_plot, accuracy_plot; layout = (1 , 2 ), size = (1200 , 500 ));
223223
224224# Save the plot
225225savefig (combined_plot, " adult_encoding_comparison.png" ); # hide
0 commit comments