Skip to content

Commit a8c8b2e

Browse files
viraatcclaude
andcommitted
fix: address fourth round of PR review comments
- Fix misleading log messages after early stop (now distinguishes "aborted early" from "all samples issued") - Fix monkeypatch raising=False for cross-platform sched_getaffinity - Fix docstring: 2 CPUs → 4 CPUs to match actual test setup - Add thread-safety note to _parallel_batch_tokenize docstring Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 93627da commit a8c8b2e

File tree

3 files changed

+14
-4
lines changed

3 files changed

+14
-4
lines changed

src/inference_endpoint/load_generator/session.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,10 @@ def _run_test(
106106
EventRecorder.record_event(
107107
SessionEvent.STOP_PERFORMANCE_TRACKING, time.monotonic_ns()
108108
)
109-
self.logger.info("All performance samples issued")
109+
if self.stop_requested:
110+
self.logger.info("Performance sample issuance aborted early")
111+
else:
112+
self.logger.info("All performance samples issued")
110113

111114
if accuracy_test_generators and not self.stop_requested:
112115
for _, generator in accuracy_test_generators.items():
@@ -116,7 +119,10 @@ def _run_test(
116119
if self.stop_requested:
117120
break
118121

119-
self.logger.info("All accuracy samples issued")
122+
if self.stop_requested:
123+
self.logger.info("Accuracy sample issuance aborted early")
124+
else:
125+
self.logger.info("All accuracy samples issued")
120126

121127
self.event_recorder.should_check_idle = True
122128
EventRecorder.record_event(

src/inference_endpoint/metrics/reporter.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ def _parallel_batch_tokenize(tokenizer: Tokenizer, texts: list[str]) -> list[int
4747
Uses a ThreadPoolExecutor to parallelize across ~95% of CPU cores.
4848
HuggingFace tokenizers use a Rust backend that releases the GIL,
4949
so threads achieve real parallelism without GIL contention.
50+
A single tokenizer instance is shared across threads — this is safe for
51+
PreTrainedTokenizerFast (Rust-backed, thread-safe by design).
5052
"""
5153

5254
try:

tests/unit/metrics/test_reporter.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1186,12 +1186,14 @@ def test_empty_data(self):
11861186
def test_parallel_batch_tokenize_threaded_path(tokenizer, monkeypatch):
11871187
"""Exercise the threaded branch of _parallel_batch_tokenize.
11881188
1189-
Monkeypatches os.sched_getaffinity to return 2 CPUs so the threaded path
1189+
Monkeypatches os.sched_getaffinity to return 4 CPUs so the threaded path
11901190
triggers with a modest number of texts, and verifies ordering and counts.
11911191
"""
11921192
# Force 4 CPUs so n_workers=3, then provide 5 texts to exceed the
11931193
# direct-tokenize threshold and exercise the threaded chunking path.
1194-
monkeypatch.setattr(os, "sched_getaffinity", lambda _pid: {0, 1, 2, 3})
1194+
monkeypatch.setattr(
1195+
os, "sched_getaffinity", lambda _pid: {0, 1, 2, 3}, raising=False
1196+
)
11951197
texts = ["hello", "ab", "xyz", "a", "test!"]
11961198
result = _parallel_batch_tokenize(tokenizer, texts)
11971199
# CharacterTokenizer returns len(text) as token count

0 commit comments

Comments
 (0)