Entropy warmup enhancements (#651)

jhou-jpg · facebook-github-bot · commit 26aabcbc22ab · 2025-11-21T12:47:46.000-08:00
Summary: This diff fixes an implementation error that was impacting slope and r^2 computations. It also includes optimization to the entropy warmup logic that should improve its performance with smaller/faster kernels. Further numerical stability optimization were brought in from the cross-pr on nvbench NVIDIA/nvbench#286 Since entropy tracks the frequency of appearence for each unique time element, the rounding precision applied on individual latency element can impact the characteristics of entropy convergence. This diff introduces logic to dynamically increase rounding precision to maintain a balance between entropy sensitivity and trend detection Reviewed By: xuzhao9 Differential Revision: D87379814
diff --git a/tritonbench/components/do_bench/entropy/entropy_criterion.py b/tritonbench/components/do_bench/entropy/entropy_criterion.py
@@ -99,12 +99,17 @@ def _update_entropy_sum(self, old_count: int, new_count: int) -> None:
             new_count: New count (0 if removing unique value)
         """
         # Remove old contribution: S -= old_count * log2(old_count)
-        if old_count > 0:
-            self._sum_count_log_count -= old_count * math.log2(old_count)
-
-        # Add new contribution: S += new_count * log2(new_count)
-        if new_count > 0:
-            self._sum_count_log_count += new_count * math.log2(new_count)
+        # Optimzation: nlog(n) - olog(o) = nlog(1+(n-o)/o) + (n - o)log(o)
+        if old_count > 0 and new_count > 0:
+            delta = new_count - old_count
+            self._sum_count_log_count += (
+                new_count * math.log2(1 + delta / old_count) + delta * math.log2(old_count)
+            )
+        else:
+            if old_count > 0:
+                self._sum_count_log_count -= old_count * math.log2(old_count)
+            if new_count > 0:
+                self._sum_count_log_count += new_count * math.log2(new_count)
 
     def _compute_entropy(self) -> float:
         """
@@ -121,7 +126,7 @@ def _compute_entropy(self) -> float:
 
         # Entropy formula: H = log2(n) - S/n
         entropy = math.log2(n) - (self._sum_count_log_count / n)
-        return entropy
+        return max(0.0, entropy)
 
     def add_measurement(self, measurement: float) -> None:
         """
@@ -157,24 +162,20 @@ def add_measurement(self, measurement: float) -> None:
 
         # Update running statistics for linear regression
         # If entropy_tracker is full, remove oldest component from running stats
+        # removal index in the sliding window = 0
         if len(self.entropy_tracker) == self.window_size:
             old_entropy = self.entropy_tracker[0]
-            old_x = 0  # Oldest position in the sliding window
-            old_sum_x = self._sum_x
 
             # Remove old values from running sums
-            self._sum_x -= old_x
             self._sum_y -= old_entropy
-            self._sum_xy -= old_x * old_entropy
-            self._sum_x2 -= old_x * old_x
             self._sum_y2 -= old_entropy * old_entropy
-            self._n -= 1
 
             # Remove element's effect from sum of squares
-            n = self._n
-            self._sum_x2 -= 2 * old_sum_x + n  # Use saved old_sum_x
+            n = self._n - 1
             self._sum_x -= n
+            self._sum_x2 -= 2 * self._sum_x + n
             self._sum_xy -= self._sum_y
+            self._n -= 1
 
         # Add new entropy value to running stats
         x = self._n
@@ -217,7 +218,7 @@ def is_finished(self) -> bool:
         numerator = self._sum_xy - n * mean_x * mean_y
         denominator = self._sum_x2 - n * mean_x * mean_x
 
-        if denominator == 0:
+        if (denominator < 1e-9):
             return False
 
         slope = numerator / denominator
@@ -240,8 +241,8 @@ def is_finished(self) -> bool:
             + n * intercept * intercept
         )
 
-        # If ss_tot == 0, entropy values are identical => perfect stability
-        if ss_tot == 0:
+        # If ss_tot < epsilon, entropy values are identical => perfect stability
+        if (ss_tot < 1e-9):
             r2 = 1.0
         else:
             r2 = max(0.0, min(1.0, 1 - (ss_res / ss_tot)))
diff --git a/tritonbench/components/do_bench/run.py b/tritonbench/components/do_bench/run.py
@@ -502,22 +502,25 @@ def _do_bench_entropy(
     assert return_mode in ["min", "max", "mean", "median", "all"]
 
     # ENTROPY-BASED WARMUP
-    criterion = EntropyCriterion(
+    entropy_criterion = EntropyCriterion(
         max_angle=max_angle,
         min_r2=min_r2,
         window_size=window_size,
         min_warmup_samples=min_warmup_samples,
     )
-    criterion.reset()
-    BATCH_SIZE = 20
+    entropy_criterion.reset()
+
+    rounding_factor = 3
+    BATCH_SIZE = 50
     last_batch = [-1.00] * BATCH_SIZE
     counter = 0
     converged = False
+    precision_increase = False
 
     cache = triton.runtime.driver.active.get_empty_cache_for_benchmark()
 
     # Adaptive warmup loop with batched synchronization
-    while not criterion.is_finished():
+    while True:
         remaining = max_samples - counter
         batch_size = min(BATCH_SIZE, remaining) if remaining > 0 else BATCH_SIZE
 
@@ -540,20 +543,41 @@ def _do_bench_entropy(
         torch.cuda.synchronize()
 
         for i in range(batch_size):
-            v = round(batch_start_events[i].elapsed_time(batch_end_events[i]), 3)
-            criterion.add_measurement(v)
+            v = round(batch_start_events[i].elapsed_time(batch_end_events[i]), rounding_factor)
             last_batch[i] = v
+
+            entropy_criterion.add_measurement(v)
+
+            if entropy_criterion.is_finished():
+                converged = True
+                break
+
         counter += batch_size
 
+        if converged:
+            break
+
         if counter >= max_samples:
             break
-    else:
-        converged = True
+
+        if counter >= 200 and not precision_increase:
+            stats = entropy_criterion.get_stats()
+            unique_count = stats.get('unique_measurements', 0)
+
+            # If we have < 20 unique values, this indicates quantization, increase rounding precision
+            if unique_count < 20:
+                rounding_factor = 4
+                entropy_criterion.reset()
+                entropy_criterion.entropy_window_size = 1000
+
+                logger.info(f"Quantization detected: only {unique_count} unique measurements. ")
+                precision_increase = True
+
 
     # Log if warmup didn't converge
     if not converged:
         logger.warning(
-            f"Entropy warmup did not converge after {counter} samples "
+            f"Warmup did not converge after {counter} samples "
             f"(max_samples={max_samples})"
         )