diff --git a/examples/llm_sparsity/launch_finetune.sh b/examples/llm_sparsity/launch_finetune.sh index 5435fb4b5..cbfa91b86 100755 --- a/examples/llm_sparsity/launch_finetune.sh +++ b/examples/llm_sparsity/launch_finetune.sh @@ -91,8 +91,8 @@ CMD="accelerate launch --multi_gpu --mixed_precision bf16 finetune.py \ --warmup_ratio 0.0 \ --lr_scheduler_type cosine \ --logging_steps 1 \ - --fsdp 'full_shard auto_wrap' \ - --fsdp_transformer_layer_cls_to_wrap 'LlamaDecoderLayer' \ + --fsdp full_shard auto_wrap \ + --fsdp_transformer_layer_cls_to_wrap LlamaDecoderLayer \ --tf32 True \ --modelopt_restore_path $MODELOPT_RESTORE_PATH \ --report_to tensorboard \ diff --git a/examples/llm_sparsity/requirements.txt b/examples/llm_sparsity/requirements.txt index e4d43ea0e..b510769e0 100644 --- a/examples/llm_sparsity/requirements.txt +++ b/examples/llm_sparsity/requirements.txt @@ -1,3 +1,4 @@ flash-attn sentencepiece>=0.2.0 tensorboardX +transformers>=4.57.0