Update Train_Large.md

bollossom · web-flow · commit b927d4ad87b0 · 2025-02-10T11:26:29.000+08:00
diff --git a/SDT_V3/Classification/Model_Large/Train_Large.md b/SDT_V3/Classification/Model_Large/Train_Large.md
@@ -18,22 +18,63 @@ Trained weights of  83M_1x8_384: [here]().
 Pretrain:
 
 ```shell
-torchrun --standalone --nproc_per_node=8 \
-  main_finetune.py \
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 torchrun --standalone --nproc_per_node=8 \
+  main_pretrain.py \
   --batch_size 256 \
-  --blr 6e-4 \
-  --warmup_epochs 5 \
+  --blr 1.5e-4 \
+  --warmup_epochs 20 \
   --epochs 200 \
-  --model Efficient_Spiking_Transformer_s \
-  --data_path /your/data/path \
-  --output_dir outputs/T1 \
-  --log_dir outputs/T1 \
-  --model_mode ms \
-  --dist_eval
+  --model spikmae_12_512 \
+  --mask_ratio 0.50 \
+  --data_path ../imagenet1-k \
 ```
 
 Finetune:
 
+```shell
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 torchrun --standalone --nproc_per_node=8 \
+ main_finetune.py \
+  --batch_size 128 \
+  --blr 6e-4 \
+  --warmup_epochs 10 \
+  --layer_decay 0.75 \
+  --finetune ../pretrin_checkpoint.pth\
+  --epochs 150 \
+  --drop_path 0.1 \
+  --model spikformer_12_768 \
+  --data_path ../imagenet1-k \
+  --output_dir ../outputs/test \
+  --log_dir ../outputs/test \
+  --reprob 0.25 \
+  --mixup 0.8 \
+  --cutmix 1.0 \
+  --dist_eval
+```
+
+Distillation:
+```shell
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 nohup torchrun --standalone --nproc_per_node=8 \
+  main_finetune.py \
+  --batch_size 196 \
+  --blr 1e-3 \
+  --warmup_epochs 5 \
+  --epochs 100 \
+  --drop_path 0.1 \
+  --finetune finetune_checkpoint.pth \
+  --model spikformer12_512 \
+  --data_path ../imagenet1-k \
+  --output_dir ./outputs/.. \
+  --log_dir ./outputs/.. \
+  --dist_eval \
+  --time_steps 1 \
+  --kd \
+  --input_size 224 \
+  --teacher_model caformer_b36_in21ft1k \
+  --reprob 0.25 \
+  --mixup 0.5 \
+  --cutmix 1.0 \
+  --distillation_type hard 
+```
 
 
 ### Data Prepare