From 8aa6cc41e9637649b16d29b2a0f77ce44e839e5b Mon Sep 17 00:00:00 2001
From: Lucas Wilkinson <lwilkins@redhat.com>
Date: Wed, 1 Oct 2025 07:26:16 -0700
Subject: [PATCH] revert max split heuristics

Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
---
 csrc/attention/mla/cutlass_sm100_mla/device/sm100_mla.hpp | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/csrc/attention/mla/cutlass_sm100_mla/device/sm100_mla.hpp b/csrc/attention/mla/cutlass_sm100_mla/device/sm100_mla.hpp
index 297d94dcc063..77e1c9351f97 100644
--- a/csrc/attention/mla/cutlass_sm100_mla/device/sm100_mla.hpp
+++ b/csrc/attention/mla/cutlass_sm100_mla/device/sm100_mla.hpp
@@ -134,13 +134,6 @@ class MLA {
     int max_splits = ceil_div(K, 128);
     max_splits = min(16, max_splits);
 
-    // TODO: This avoids a hang when the batch size larger than 1 and 
-    // there is more than 1 kv_splits. 
-    // Discuss with NVIDIA how this can be fixed.
-    if (B > 1) {
-      max_splits = min(1, max_splits);
-    }
-    
     // printf("    max_splits = %d\n", max_splits);
     int sms_per_batch = max(1, sm_count / B);
     // printf("    sms_per_batch = %d\n", sms_per_batch);