@@ -44,3 +44,151 @@ ResNetContractConfig { layers: [3, 4, 23, 3], num_classes: 1000, stem_width: 64,
4444ResNetContractConfig { layers: [3, 8, 36, 3], num_classes: 1000, stem_width: 64, output_stride: 32, bottleneck_policy: Some(BottleneckPolicyConfig { pinch_factor: 4 }), normalization: Batch(BatchNormConfig { num_features: 0, epsilon: 1e-5, momentum: 0.1 }), activation: Relu }
4545 - "resnet152.tv_in1k": TorchVision ResNet-152
4646```
47+
48+ ## Various Options to Rewrite Models
49+
50+ ``` terminaloutput
51+
52+ $ cargo run --release -p resnet_finetune --features cuda -- --cautious-weight-decay
53+ | Split | Metric | Min. | Epoch | Max. | Epoch |
54+ |-------|--------------------------------|----------|----------|----------|----------|
55+ | Train | CPU Memory | 16.340 | 1 | 19.275 | 59 |
56+ | Train | CPU Usage | 1.752 | 26 | 3.208 | 59 |
57+ | Train | Hamming Score @ Threshold(0.5) | 82.597 | 1 | 97.160 | 48 |
58+ | Train | Learning Rate | 1.284e-8 | 58 | 4.999e-5 | 1 |
59+ | Train | Loss | 0.147 | 50 | 0.642 | 1 |
60+ | Valid | CPU Memory | 16.268 | 1 | 19.412 | 59 |
61+ | Valid | CPU Usage | 1.682 | 7 | 3.643 | 58 |
62+ | Valid | Hamming Score @ Threshold(0.5) | 84.843 | 1 | 95.235 | 38 |
63+ | Valid | Loss | 0.146 | 52 | 0.624 | 1 |
64+ Args {
65+ seed: 0,
66+ train_percentage: 70,
67+ artifact_dir: "/tmp/resnet_finetune",
68+ batch_size: 24,
69+ grads_accumulation: 8,
70+ smoothing: Some(
71+ 0.1,
72+ ),
73+ num_workers: 4,
74+ num_epochs: 60,
75+ patience: 20,
76+ pretrained: "resnet50.tv_in1k",
77+ replace_activation: None,
78+ freeze_layers: false,
79+ drop_block_prob: 0.2,
80+ stochastic_depth_prob: 0.05,
81+ learning_rate: 5e-5,
82+ cautious_weight_decay: true,
83+ weight_decay: 0.02,
84+ }
85+
86+ $ cargo run --release -p resnet_finetune --features cuda --
87+ | Split | Metric | Min. | Epoch | Max. | Epoch |
88+ |-------|--------------------------------|----------|----------|----------|----------|
89+ | Train | CPU Memory | 16.192 | 1 | 18.592 | 59 |
90+ | Train | CPU Usage | 1.704 | 57 | 2.672 | 47 |
91+ | Train | Hamming Score @ Threshold(0.5) | 82.580 | 1 | 97.126 | 49 |
92+ | Train | Learning Rate | 1.284e-8 | 58 | 4.999e-5 | 1 |
93+ | Train | Loss | 0.147 | 50 | 0.643 | 1 |
94+ | Valid | CPU Memory | 16.235 | 1 | 18.607 | 59 |
95+ | Valid | CPU Usage | 1.704 | 1 | 3.640 | 41 |
96+ | Valid | Hamming Score @ Threshold(0.5) | 84.902 | 1 | 95.196 | 53 |
97+ | Valid | Loss | 0.148 | 55 | 0.621 | 1 |
98+ Args {
99+ seed: 0,
100+ train_percentage: 70,
101+ artifact_dir: "/tmp/resnet_finetune",
102+ batch_size: 24,
103+ grads_accumulation: 8,
104+ smoothing: Some(
105+ 0.1,
106+ ),
107+ num_workers: 4,
108+ num_epochs: 60,
109+ patience: 20,
110+ pretrained: "resnet50.tv_in1k",
111+ replace_activation: None,
112+ freeze_layers: false,
113+ drop_block_prob: 0.2,
114+ stochastic_depth_prob: 0.05,
115+ learning_rate: 5e-5,
116+ cautious_weight_decay: false,
117+ weight_decay: 0.02,
118+ }
119+
120+ | Split | Metric | Min. | Epoch | Max. | Epoch |
121+ |-------|--------------------------------|----------|----------|----------|----------|
122+ | Train | CPU Memory | 16.486 | 1 | 18.592 | 60 |
123+ | Train | CPU Usage | 1.492 | 31 | 2.761 | 3 |
124+ | Train | Hamming Score @ Threshold(0.5) | 82.647 | 1 | 97.168 | 55 |
125+ | Train | Learning Rate | 1.284e-8 | 58 | 4.999e-5 | 1 |
126+ | Train | Loss | 0.146 | 50 | 0.642 | 1 |
127+ | Valid | CPU Memory | 16.585 | 1 | 18.600 | 60 |
128+ | Valid | CPU Usage | 1.452 | 32 | 5.302 | 3 |
129+ | Valid | Hamming Score @ Threshold(0.5) | 84.941 | 1 | 95.294 | 39 |
130+ | Valid | Loss | 0.147 | 55 | 0.620 | 1 |
131+
132+ Training completed in 12m53s
133+ Args {
134+ seed: 0,
135+ train_percentage: 70,
136+ artifact_dir: "/tmp/resnet_finetune",
137+ batch_size: 24,
138+ grads_accumulation: 8,
139+ smoothing: Some(
140+ 0.1,
141+ ),
142+ num_workers: 4,
143+ num_epochs: 60,
144+ patience: 20,
145+ pretrained: "resnet50.tv_in1k",
146+ replace_activation: Some(
147+ LeakyRelu,
148+ ),
149+ freeze_layers: false,
150+ drop_block_prob: 0.2,
151+ stochastic_depth_prob: 0.05,
152+ learning_rate: 5e-5,
153+ cautious_weight_decay: true,
154+ weight_decay: 0.02,
155+ }
156+
157+ $ cargo run --release -p resnet_finetune --features cuda -- --replace-activation gelu --cautious-weight-decay
158+ | Split | Metric | Min. | Epoch | Max. | Epoch |
159+ |-------|--------------------------------|----------|----------|----------|----------|
160+ | Train | CPU Memory | 16.276 | 1 | 18.669 | 60 |
161+ | Train | CPU Usage | 1.491 | 46 | 2.209 | 2 |
162+ | Train | Hamming Score @ Threshold(0.5) | 83.244 | 1 | 95.790 | 58 |
163+ | Train | Learning Rate | 1.284e-8 | 58 | 4.999e-5 | 1 |
164+ | Train | Loss | 0.171 | 50 | 0.654 | 1 |
165+ | Valid | CPU Memory | 16.409 | 1 | 18.673 | 60 |
166+ | Valid | CPU Usage | 1.494 | 1 | 2.902 | 15 |
167+ | Valid | Hamming Score @ Threshold(0.5) | 83.373 | 1 | 94.706 | 57 |
168+ | Valid | Loss | 0.159 | 55 | 0.637 | 1 |
169+
170+ Training completed in 13m12s
171+ Args {
172+ seed: 0,
173+ train_percentage: 70,
174+ artifact_dir: "/tmp/resnet_finetune",
175+ batch_size: 24,
176+ grads_accumulation: 8,
177+ smoothing: Some(
178+ 0.1,
179+ ),
180+ num_workers: 4,
181+ num_epochs: 60,
182+ patience: 20,
183+ pretrained: "resnet50.tv_in1k",
184+ replace_activation: Some(
185+ Gelu,
186+ ),
187+ freeze_layers: false,
188+ drop_block_prob: 0.2,
189+ stochastic_depth_prob: 0.05,
190+ learning_rate: 5e-5,
191+ cautious_weight_decay: true,
192+ weight_decay: 0.02,
193+ }
194+ ```
0 commit comments