Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/guidellm/benchmark/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -817,7 +817,7 @@ def from_stats(
],
iter_counts=[req.output_tokens for req in total_with_output_first],
first_iter_counts=[
req.prompt_tokens for req in total_with_output_first
req.prompt_tokens + 1 for req in total_with_output_first
],
),
),
Expand Down
20 changes: 12 additions & 8 deletions src/guidellm/objects/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ def from_values(
)

@staticmethod
def from_request_times(
def from_request_times( # noqa: C901
requests: list[tuple[float, float]],
distribution_type: Literal["concurrency", "rate"],
include_cdf: bool = False,
Expand Down Expand Up @@ -248,13 +248,7 @@ def from_request_times(
time_deltas[start] += 1
time_deltas[end] -= 1

# convert to the events over time measuring concurrency changes
events = []
active = 0

for time, delta in sorted(time_deltas.items()):
active += delta
events.append((time, active))
events = list(time_deltas.items())
elif distribution_type == "rate":
# convert to events for when requests finished
global_start = min(start for start, _ in requests) if requests else 0
Expand All @@ -281,6 +275,16 @@ def from_request_times(
else:
flattened_events.append((time, val))

if distribution_type == "concurrency":
# convert to the events over time measuring concurrency changes
events_over_time: list[tuple[float, float]] = []
active = 0
for time, delta in flattened_events:
active += delta # type: ignore [assignment]
events_over_time.append((time, active))

flattened_events = events_over_time

# convert to value distribution function
distribution: dict[float, float] = defaultdict(float)

Expand Down
79 changes: 79 additions & 0 deletions tests/unit/objects/test_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -704,3 +704,82 @@ def test_time_running_stats_update():
assert time_running_stats.rate_ms == pytest.approx(
3000 / (time.time() - time_running_stats.start_time), abs=0.1
)


@pytest.mark.regression
def test_distribution_summary_concurrency_double_counting_regression():
"""Specific regression test for the double-counting bug in concurrency calculation.

Before the fix, when events were merged due to epsilon, the deltas were summed
but then the active count wasn't properly accumulated, causing incorrect results.

### WRITTEN BY AI ###
"""
epsilon = 1e-6

# Create a scenario where multiple requests start at exactly the same time
# This should result in events being merged, testing the accumulation logic
same_start_time = 1.0
requests = [
(same_start_time, 3.0),
(same_start_time, 4.0),
(same_start_time, 5.0),
(same_start_time + epsilon / 3, 6.0), # Very close start (within epsilon)
]

distribution_summary = DistributionSummary.from_request_times(
requests, distribution_type="concurrency", epsilon=epsilon
)

# All requests start at the same time (or within epsilon), so they should
# all be considered concurrent from the start
# Expected timeline:
# - t=1.0-3.0: 4 concurrent requests
# - t=3.0-4.0: 3 concurrent requests
# - t=4.0-5.0: 2 concurrent requests
# - t=5.0-6.0: 1 concurrent request

assert distribution_summary.max == 4.0 # All 4 requests concurrent at start
assert distribution_summary.min == 1.0 # 1 request still running at the end


@pytest.mark.sanity
def test_distribution_summary_concurrency_epsilon_edge_case():
"""Test the exact epsilon boundary condition.

### WRITTEN BY AI ###
"""
epsilon = 1e-6

# Test requests that are exactly epsilon apart - should be merged
requests_exactly_epsilon = [
(1.0, 2.0),
(1.0 + epsilon, 2.5), # Exactly epsilon apart
(2.0, 2.5), # Another close request
]

dist_epsilon = DistributionSummary.from_request_times(
requests_exactly_epsilon, distribution_type="concurrency", epsilon=epsilon
)

# Should be treated as concurrent (merged events)
assert dist_epsilon.max == 2.0
assert dist_epsilon.min == 2.0

# Test requests that are just over epsilon apart - should NOT be merged
requests_over_epsilon = [
(1.0, 2.0),
(1.0 + epsilon * 1.1, 2.5), # Just over epsilon apart
(2.0, 2.5), # Another close request
]

dist_over_epsilon = DistributionSummary.from_request_times(
requests_over_epsilon, distribution_type="concurrency", epsilon=epsilon
)

# These should be treated separately, so max concurrency depends on overlap
# At t=1.0 to 1.0+epsilon*1.1: 1 concurrent
# At t=1.0+epsilon*1.1 to 2.0: 2 concurrent
# At t=2.0 to 2.5: 1 concurrent
assert dist_over_epsilon.max == 2.0
assert dist_over_epsilon.min == 1.0