Skip to content

Commit 1cf15cc

Browse files
committed
set gpu_per_node
1 parent 68abd58 commit 1cf15cc

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

autotest/module/train.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ def get_cmd(config):
99
print(config)
1010
config_path = config.get("parameters").get("config")
1111
train_type = config.get("type")
12+
nproc_per_node = config.get("resource", {}).get("gpus_per_task", 8)
1213
if train_type in ["sft", "rl"]:
1314
model_config = config.get("parameters", {}).get("model", None)
1415
config_path = config.get("parameters", {}).get("config", None)
@@ -26,7 +27,7 @@ def get_cmd(config):
2627

2728
command = (
2829
f"cd {current_dir}; pwd; pip install -e .[all]; pip install more-itertools; export GITHUB_RUN_ID={config.get('run_id')}; "
29-
+ "torchrun --nproc-per-node 8 --master_addr=${MASTER_ADDR} --master_port=${MASTER_PORT} --nnodes=${WORLD_SIZE} --node_rank=${RANK} "
30+
+ f"torchrun --nproc-per-node {nproc_per_node} --master_addr=${{MASTER_ADDR}} --master_port=${{MASTER_PORT}} --nnodes=${{WORLD_SIZE}} --node_rank=${{RANK}} "
3031
+ f"xtuner/v1/train/cli/{train_type}.py"
3132
)
3233
if config_path:

0 commit comments

Comments
 (0)