|
36 | 36 |
|
37 | 37 | static int threads = -1; |
38 | 38 | static int batch_size = 32; |
39 | | -static int epoch_count = 5; |
| 39 | +static int epoch_count = 15; |
40 | 40 | static int export_onnx = 0; |
41 | 41 |
|
42 | 42 | //---------------------------------- |
@@ -214,10 +214,10 @@ int main(int argc, char *argv[]) |
214 | 214 | // make a new network |
215 | 215 | PNetwork pnet = ann_make_network(OPT_ADAM, LOSS_CATEGORICAL_CROSS_ENTROPY); |
216 | 216 |
|
217 | | - // define our network |
| 217 | + // define our network - deeper architecture with ReLU for better accuracy |
218 | 218 | ann_add_layer(pnet, 784, LAYER_INPUT, ACTIVATION_NULL); |
219 | | - ann_add_layer(pnet, 32, LAYER_HIDDEN, ACTIVATION_SIGMOID); |
220 | | -// ann_add_layer(pnet, 128, LAYER_HIDDEN, ACTIVATION_RELU); |
| 219 | + ann_add_layer(pnet, 128, LAYER_HIDDEN, ACTIVATION_RELU); |
| 220 | + ann_add_layer(pnet, 64, LAYER_HIDDEN, ACTIVATION_RELU); |
221 | 221 | ann_add_layer(pnet, 10, LAYER_OUTPUT, ACTIVATION_SOFTMAX); |
222 | 222 |
|
223 | 223 | real *data = NULL, *test_data = NULL; |
@@ -266,6 +266,8 @@ int main(int argc, char *argv[]) |
266 | 266 | ann_set_epoch_limit(pnet, epoch_count); |
267 | 267 | ann_set_convergence(pnet, (real)0.1); |
268 | 268 | ann_set_batch_size(pnet, batch_size); |
| 269 | + ann_set_dropout(pnet, 0.2f); // 20% dropout on hidden layers |
| 270 | + ann_set_gradient_clip(pnet, 5.0f); // Clip gradients for stability |
269 | 271 |
|
270 | 272 | // Add exponential LR decay (5% reduction per epoch) |
271 | 273 | static real lr_decay = 0.95f; |
|
0 commit comments