test: add pipeline parallel test scripts

JYMiracle305 · JYMiracle305 · commit b85130698933 · 2025-11-17T22:50:04.000+08:00
diff --git a/scripts/run_models_and_profile.bash b/scripts/run_models_and_profile.bash
@@ -156,12 +156,18 @@ for ((id=0; id<num_builds; ++id)); do
         test_id=$(jq -r ".tests[$ti].id" "$CONFIG_FILE")
         arg_str="$(args_string_for_test "$ti")"
 
+        if [[ "$arg_str" == *"tensor_parallel"* ]]; then
+            prefix="NCCL_LAUNCH_MODE=GROUP "
+        else
+            prefix=""
+        fi
+
         # gpt2
-        gpt2_cmd="./gpt2 --input_bin ${GPT2_INPUT_BIN} --llmc_filepath ${GPT2_LLMC_FILEPATH} --device cuda ${arg_str}"
+        gpt2_cmd="${prefix}./gpt2 --input_bin ${GPT2_INPUT_BIN} --llmc_filepath ${GPT2_LLMC_FILEPATH} --device cuda ${arg_str}"
         run_and_log "$gpt2_cmd" "gpt2_${test_id}${log_suffix}" "$profile_flag"
 
         # llama3
-        llama3_cmd="./llama3 --input_bin ${LLAMA3_INPUT_BIN} --llmc_filepath ${LLAMA3_LLMC_FILEPATH} --device cuda ${arg_str}"
+        llama3_cmd="${prefix}./llama3 --input_bin ${LLAMA3_INPUT_BIN} --llmc_filepath ${LLAMA3_LLMC_FILEPATH} --device cuda ${arg_str}"
         run_and_log "$llama3_cmd" "llama3_${test_id}${log_suffix}" "$profile_flag"
     done
 done
diff --git a/scripts/test_config.json b/scripts/test_config.json
@@ -116,6 +116,29 @@
                 "tensor_parallel": 4,
                 "sequence_parallel": true
             }
+        },
+        {
+            "id": "6",
+            "args": {
+                "dtype": "float32",
+                "nthread_per_process": 8,
+                "num_iteration": 10,
+                "batch_size": 10,
+                "total_batch_size": 5120,
+                "pipeline_parallel": 8
+            }
+        },
+        {
+            "id": "6_bfloat16",
+            "args": {
+                "dtype": "bfloat16",
+                "nthread_per_process": 8,
+                "num_iteration": 10,
+                "batch_size": 10,
+                "total_batch_size": 5120,
+                "pipeline_parallel": 8
+            }
         }
     ]
 }
+