address review feedback when world size is uncommon

xinli-git · xinli-git · commit bf247dae8fb0 · 2025-07-21T14:28:27.000-07:00
diff --git a/vllm/compilation/collective_fusion.py b/vllm/compilation/collective_fusion.py
@@ -159,6 +159,9 @@ def __call__(self, graph: fx.Graph):
         6: MiB // 2,  # 512KB
         8: MiB // 2,  # 512KB
     }
+    # opt for a more conservative default value
+    # when world size is not in _FI_MAX_SIZES
+    _DEFAULT_FI_MAX_SIZE = MiB // 2
 
     def call_trtllm_fused_allreduce_norm(
         allreduce_in: torch.Tensor,
@@ -178,8 +181,10 @@ def call_trtllm_fused_allreduce_norm(
         element_size = allreduce_in.element_size()
         current_tensor_size = num_tokens * hidden_size * element_size
         max_fusion_size = max_token_num * hidden_size * element_size
-        use_flashinfer = current_tensor_size <= min(_FI_MAX_SIZES[world_size],
-                                                    max_fusion_size)
+        use_flashinfer = current_tensor_size <= min(
+            _FI_MAX_SIZES.get(world_size, _DEFAULT_FI_MAX_SIZE),
+            max_fusion_size,
+        )
 
         if use_flashinfer:
             assert (_FI_WORKSPACE_TENSOR is not None