Switch back to triton backend for fuse_rmsnorm

nvchenghaoz · nvchenghaoz · commit 7e3e37938d8f · 2025-12-05T15:54:30.000-08:00
Signed-off-by: Chenghao Zhang &lt;211069071+nvchenghaoz@users.noreply.github.com&gt;
diff --git a/tests/integration/defs/examples/test_ad_speculative_decoding.py b/tests/integration/defs/examples/test_ad_speculative_decoding.py
@@ -81,6 +81,9 @@ def run_with_autodeploy(model, speculative_model_dir, batch_size):
         "world_size": 1,
         "kv_cache_config": kv_cache_config,
         "disable_overlap_scheduler": True,
+        "transforms": {
+            "fuse_rmsnorm": {"rmsnorm_backend": "triton"},
+        },
         "max_num_tokens": 64,
     }
 

Original file line number	Diff line number	Diff line change
`@@ -81,6 +81,9 @@ def run_with_autodeploy(model, speculative_model_dir, batch_size):`
`81`	`81`	`"world_size": 1,`
`82`	`82`	`"kv_cache_config": kv_cache_config,`
`83`	`83`	`"disable_overlap_scheduler": True,`
	`84`	`+ "transforms": {`
	`85`	`+ "fuse_rmsnorm": {"rmsnorm_backend": "triton"},`
	`86`	`+ },`
`84`	`87`	`"max_num_tokens": 64,`
`85`	`88`	`}`
`86`	`89`