update multigpu tests

h-guo18 · h-guo18 · commit 860b4e460535 · 2025-09-29T05:38:23.000Z
Signed-off-by: h-guo18 &lt;67671475+h-guo18@users.noreply.github.com&gt;
diff --git a/tests/unittest/_torch/auto_deploy/unit/multigpu/test_ad_build_small_multi.py b/tests/unittest/_torch/auto_deploy/unit/multigpu/test_ad_build_small_multi.py
@@ -10,7 +10,7 @@
 @pytest.mark.parametrize("world_size", [1, 2])
 @pytest.mark.parametrize("mode", ["graph", "transformers"])
 @pytest.mark.parametrize(
-    "experiment_config",
+    "experiment_config, attn_backend, compile_backend",
     [
         get_small_model_config_pytest_param(
             "meta-llama/Meta-Llama-3.1-8B-Instruct",
@@ -19,10 +19,39 @@
         ),
     ],
 )
-def test_build_ad(world_size: int, experiment_config: Dict, mode: str):
+def test_build_ad(
+    world_size: int, experiment_config: Dict, attn_backend: str, compile_backend: str, mode: str
+):
     experiment_config["args"]["world_size"] = world_size
     experiment_config["args"]["runtime"] = "trtllm"  # Default runtime set to trtllm
     experiment_config["args"]["mode"] = mode
+    experiment_config["args"]["transforms"] = (
+        {
+            "resize_kv_cache": {
+                "stage": "cache_init",
+                "free_mem_ratio": 0.00,
+            },
+            "match_attention_layout": {
+                "stage": "pattern_matcher",
+                "attn_backend": attn_backend,
+            },
+            "insert_cached_attention": {
+                "stage": "cache_init",
+                "attn_backend": attn_backend,
+            },
+            "compile_model": {
+                "stage": "compile",
+                "compile_backend": compile_backend,
+            },
+        }
+        if mode == "graph"
+        else {
+            "transformers_replace_cached_attn": {
+                "stage": "cache_init",
+                "attn_backend": attn_backend,
+            },
+        }
+    )
     experiment_config = ExperimentConfig(**experiment_config)
     print(f"Experiment Config: {experiment_config}")
     main(experiment_config)