File tree Expand file tree Collapse file tree 2 files changed +31
-2
lines changed Expand file tree Collapse file tree 2 files changed +31
-2
lines changed Original file line number Diff line number Diff line change @@ -156,12 +156,18 @@ for ((id=0; id<num_builds; ++id)); do
156156 test_id=$( jq -r " .tests[$ti ].id" " $CONFIG_FILE " )
157157 arg_str=" $( args_string_for_test " $ti " ) "
158158
159+ if [[ " $arg_str " == * " tensor_parallel" * ]]; then
160+ prefix=" NCCL_LAUNCH_MODE=GROUP "
161+ else
162+ prefix=" "
163+ fi
164+
159165 # gpt2
160- gpt2_cmd=" ./gpt2 --input_bin ${GPT2_INPUT_BIN} --llmc_filepath ${GPT2_LLMC_FILEPATH} --device cuda ${arg_str} "
166+ gpt2_cmd=" ${prefix} ./gpt2 --input_bin ${GPT2_INPUT_BIN} --llmc_filepath ${GPT2_LLMC_FILEPATH} --device cuda ${arg_str} "
161167 run_and_log " $gpt2_cmd " " gpt2_${test_id}${log_suffix} " " $profile_flag "
162168
163169 # llama3
164- llama3_cmd=" ./llama3 --input_bin ${LLAMA3_INPUT_BIN} --llmc_filepath ${LLAMA3_LLMC_FILEPATH} --device cuda ${arg_str} "
170+ llama3_cmd=" ${prefix} ./llama3 --input_bin ${LLAMA3_INPUT_BIN} --llmc_filepath ${LLAMA3_LLMC_FILEPATH} --device cuda ${arg_str} "
165171 run_and_log " $llama3_cmd " " llama3_${test_id}${log_suffix} " " $profile_flag "
166172 done
167173done
Original file line number Diff line number Diff line change 116116 "tensor_parallel" : 4 ,
117117 "sequence_parallel" : true
118118 }
119+ },
120+ {
121+ "id" : " 6" ,
122+ "args" : {
123+ "dtype" : " float32" ,
124+ "nthread_per_process" : 8 ,
125+ "num_iteration" : 10 ,
126+ "batch_size" : 10 ,
127+ "total_batch_size" : 5120 ,
128+ "pipeline_parallel" : 8
129+ }
130+ },
131+ {
132+ "id" : " 6_bfloat16" ,
133+ "args" : {
134+ "dtype" : " bfloat16" ,
135+ "nthread_per_process" : 8 ,
136+ "num_iteration" : 10 ,
137+ "batch_size" : 10 ,
138+ "total_batch_size" : 5120 ,
139+ "pipeline_parallel" : 8
140+ }
119141 }
120142 ]
121143}
144+
You can’t perform that action at this time.
0 commit comments