Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 19 additions & 18 deletions tritonbench/components/do_bench/entropy/entropy_criterion.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,17 @@ def _update_entropy_sum(self, old_count: int, new_count: int) -> None:
new_count: New count (0 if removing unique value)
"""
# Remove old contribution: S -= old_count * log2(old_count)
if old_count > 0:
self._sum_count_log_count -= old_count * math.log2(old_count)

# Add new contribution: S += new_count * log2(new_count)
if new_count > 0:
self._sum_count_log_count += new_count * math.log2(new_count)
# Optimzation: nlog(n) - olog(o) = nlog(1+(n-o)/o) + (n - o)log(o)
if old_count > 0 and new_count > 0:
delta = new_count - old_count
self._sum_count_log_count += (
new_count * math.log2(1 + delta / old_count) + delta * math.log2(old_count)
)
else:
if old_count > 0:
self._sum_count_log_count -= old_count * math.log2(old_count)
if new_count > 0:
self._sum_count_log_count += new_count * math.log2(new_count)

def _compute_entropy(self) -> float:
"""
Expand All @@ -121,7 +126,7 @@ def _compute_entropy(self) -> float:

# Entropy formula: H = log2(n) - S/n
entropy = math.log2(n) - (self._sum_count_log_count / n)
return entropy
return max(0.0, entropy)

def add_measurement(self, measurement: float) -> None:
"""
Expand Down Expand Up @@ -157,24 +162,20 @@ def add_measurement(self, measurement: float) -> None:

# Update running statistics for linear regression
# If entropy_tracker is full, remove oldest component from running stats
# removal index in the sliding window = 0
if len(self.entropy_tracker) == self.window_size:
old_entropy = self.entropy_tracker[0]
old_x = 0 # Oldest position in the sliding window
old_sum_x = self._sum_x

# Remove old values from running sums
self._sum_x -= old_x
self._sum_y -= old_entropy
self._sum_xy -= old_x * old_entropy
self._sum_x2 -= old_x * old_x
self._sum_y2 -= old_entropy * old_entropy
self._n -= 1

# Remove element's effect from sum of squares
n = self._n
self._sum_x2 -= 2 * old_sum_x + n # Use saved old_sum_x
n = self._n - 1
self._sum_x -= n
self._sum_x2 -= 2 * self._sum_x + n
self._sum_xy -= self._sum_y
self._n -= 1

# Add new entropy value to running stats
x = self._n
Expand Down Expand Up @@ -217,7 +218,7 @@ def is_finished(self) -> bool:
numerator = self._sum_xy - n * mean_x * mean_y
denominator = self._sum_x2 - n * mean_x * mean_x

if denominator == 0:
if (denominator < 1e-9):
return False

slope = numerator / denominator
Expand All @@ -240,8 +241,8 @@ def is_finished(self) -> bool:
+ n * intercept * intercept
)

# If ss_tot == 0, entropy values are identical => perfect stability
if ss_tot == 0:
# If ss_tot < epsilon, entropy values are identical => perfect stability
if (ss_tot < 1e-9):
r2 = 1.0
else:
r2 = max(0.0, min(1.0, 1 - (ss_res / ss_tot)))
Expand Down
42 changes: 33 additions & 9 deletions tritonbench/components/do_bench/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,22 +502,25 @@ def _do_bench_entropy(
assert return_mode in ["min", "max", "mean", "median", "all"]

# ENTROPY-BASED WARMUP
criterion = EntropyCriterion(
entropy_criterion = EntropyCriterion(
max_angle=max_angle,
min_r2=min_r2,
window_size=window_size,
min_warmup_samples=min_warmup_samples,
)
criterion.reset()
BATCH_SIZE = 20
entropy_criterion.reset()

rounding_factor = 3
BATCH_SIZE = 50
last_batch = [-1.00] * BATCH_SIZE
counter = 0
converged = False
precision_increase = False

cache = triton.runtime.driver.active.get_empty_cache_for_benchmark()

# Adaptive warmup loop with batched synchronization
while not criterion.is_finished():
while True:
remaining = max_samples - counter
batch_size = min(BATCH_SIZE, remaining) if remaining > 0 else BATCH_SIZE

Expand All @@ -540,20 +543,41 @@ def _do_bench_entropy(
torch.cuda.synchronize()

for i in range(batch_size):
v = round(batch_start_events[i].elapsed_time(batch_end_events[i]), 3)
criterion.add_measurement(v)
v = round(batch_start_events[i].elapsed_time(batch_end_events[i]), rounding_factor)
last_batch[i] = v

entropy_criterion.add_measurement(v)

if entropy_criterion.is_finished():
converged = True
break

counter += batch_size

if converged:
break

if counter >= max_samples:
break
else:
converged = True

if counter >= 200 and not precision_increase:
stats = entropy_criterion.get_stats()
unique_count = stats.get('unique_measurements', 0)

# If we have < 20 unique values, this indicates quantization, increase rounding precision
if unique_count < 20:
rounding_factor = 4
entropy_criterion.reset()
entropy_criterion.entropy_window_size = 1000

logger.info(f"Quantization detected: only {unique_count} unique measurements. ")
precision_increase = True


# Log if warmup didn't converge
if not converged:
logger.warning(
f"Entropy warmup did not converge after {counter} samples "
f"Warmup did not converge after {counter} samples "
f"(max_samples={max_samples})"
)

Expand Down
Loading