File tree Expand file tree Collapse file tree 2 files changed +3
-4
lines changed
Expand file tree Collapse file tree 2 files changed +3
-4
lines changed Original file line number Diff line number Diff line change 5050 tokenizer_path = GPTOSS_21B_PATH ,
5151 global_batch_size = 16 ,
5252 total_epoch = 1 ,
53- work_dir = f"/mnt/shared-storage-user/llmrazor-share/qa-llm-cicd/test_output/ { os .environ ['GITHUB_RUN_ID ' ]} /gptoss-sft/sft " ,
53+ work_dir = f"{ os .environ ['WORK_DIR ' ]} " ,
5454 seed = 0 ,
5555)
Original file line number Diff line number Diff line change @@ -24,10 +24,11 @@ def get_cmd(config):
2424 train_type ,
2525 ]
2626 )
27+ config ["work_dir" ] = work_dir
2728
2829 if train_type == "sft" :
2930 command = (
30- f"cd { current_dir } ; pwd; pip install -e .[all]; pip install more-itertools; export GITHUB_RUN_ID={ config .get ('run_id' )} ; "
31+ f"cd { current_dir } ; pwd; pip install -e .[all]; pip install more-itertools; export GITHUB_RUN_ID={ config .get ('run_id' )} ; export WORK_DIR= { work_dir } ; "
3132 + f"torchrun --nproc-per-node { nproc_per_node } --master_addr=${{MASTER_ADDR}} --master_port=${{MASTER_PORT}} --nnodes=${{WORLD_SIZE}} --node_rank=${{RANK}} "
3233 + f"xtuner/v1/train/cli/{ train_type } .py"
3334 )
@@ -46,11 +47,9 @@ def get_cmd(config):
4647 command += f" --dataset { dataset_path } "
4748 command += f" --work_dir { work_dir } "
4849
49- config ["work_dir" ] = work_dir
5050 return command , config
5151 elif train_type == "rl" :
5252 infer_type = config .get ("parameters" , {}).get ("infer_backend" , "lmdeploy" )
53- config ["work_dir" ] = work_dir
5453 command = (
5554 f"cd { current_dir } ; pwd; pip install -e .[all]; export GITHUB_RUN_ID={ config .get ('run_id' )} ; export WORK_DIR={ work_dir } ; "
5655 + f"bash -x examples/v1/scripts/run_rl.sh { config_path } { infer_type } ${{MODEL_PATH}} ${{DATA_PATH}} ${{EVAL_DATA_PATH}}"
You can’t perform that action at this time.
0 commit comments