Skip to content

Commit 897894d

Browse files
committed
Merge branch 'wangshangsam/fix-req-timeout' of github.com:CentML/mlperf-inference into wangshangsam/fix-req-timeout
2 parents f9d983f + 5370ecd commit 897894d

File tree

2 files changed

+15
-7
lines changed

2 files changed

+15
-7
lines changed

multimodal/vl2l/src/mlperf_inference_multimodal_vl2l/cli.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,9 @@ def _run_benchmark(
8484
test_settings, log_settings = settings.to_lgtype()
8585
logger.info("Running VL2L benchmark with settings: {}", settings)
8686
logger.info("Running VL2L benchmark with dataset: {}", dataset)
87-
logger.info("Running VL2L benchmark with OpenAI API endpoint: {}", endpoint)
87+
logger.info(
88+
"Running VL2L benchmark with OpenAI API endpoint: {}",
89+
endpoint)
8890
logger.info("Running VL2L benchmark with random seed: {}", random_seed)
8991
task = ShopifyGlobalCatalogue(
9092
dataset=dataset,

multimodal/vl2l/src/mlperf_inference_multimodal_vl2l/schema.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,8 @@ class TestSettings(BaseModelWithAttributeDescriptionsFromDocstrings):
366366
mode="before",
367367
)
368368
@classmethod
369-
def parse_timedelta(cls, value: timedelta | float | str) -> timedelta | str:
369+
def parse_timedelta(cls, value: timedelta | float |
370+
str) -> timedelta | str:
370371
"""Parse timedelta from seconds (int/float/str) or ISO 8601 format."""
371372
if isinstance(value, timedelta):
372373
return value
@@ -411,8 +412,10 @@ def to_lgtype(self) -> lg.TestSettings:
411412
settings.sample_concatenate_permutation = self.sample_concatenate_permutation
412413

413414
# Test duration settings
414-
settings.min_duration_ms = round(self.min_duration.total_seconds() * 1000)
415-
settings.max_duration_ms = round(self.max_duration.total_seconds() * 1000)
415+
settings.min_duration_ms = round(
416+
self.min_duration.total_seconds() * 1000)
417+
settings.max_duration_ms = round(
418+
self.max_duration.total_seconds() * 1000)
416419
settings.min_query_count = self.min_query_count
417420
settings.max_query_count = self.max_query_count
418421

@@ -439,8 +442,10 @@ def to_lgtype(self) -> lg.TestSettings:
439442
self.performance_sample_count_override
440443
)
441444
settings.use_token_latencies = self.use_token_latencies
442-
settings.ttft_latency = round(self.server_ttft_latency.total_seconds() * 1e9)
443-
settings.tpot_latency = round(self.server_tpot_latency.total_seconds() * 1e9)
445+
settings.ttft_latency = round(
446+
self.server_ttft_latency.total_seconds() * 1e9)
447+
settings.tpot_latency = round(
448+
self.server_tpot_latency.total_seconds() * 1e9)
444449
settings.infer_token_latencies = self.infer_token_latencies
445450
settings.token_latency_scaling_factor = self.token_latency_scaling_factor
446451

@@ -789,5 +794,6 @@ def ensure_content_is_list(
789794
== "pydantic_core._pydantic_core"
790795
and message["content"].__class__.__name__ == "ValidatorIterator"
791796
):
792-
message["content"] = list(message["content"]) # type: ignore[arg-type]
797+
message["content"] = list(
798+
message["content"]) # type: ignore[arg-type]
793799
return messages

0 commit comments

Comments
 (0)