Skip to content

Commit 1b869cc

Browse files
author
github-actions
committed
Auto-merge updates from master branch
2 parents a0f5136 + 0369dd1 commit 1b869cc

File tree

26 files changed

+1826
-893
lines changed

26 files changed

+1826
-893
lines changed

loadgen/README_BUILD.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
pip install absl-py numpy
1313
git clone --recurse-submodules https://github.com/mlcommons/inference.git mlperf_inference
1414
cd mlperf_inference/loadgen
15-
CFLAGS="-std=c++14 -O3" python -m pip install .
15+
python -m pip install .
1616

1717
This will fetch the loadgen source, build and install the loadgen as a python module, and run a simple end-to-end demo.
1818

loadgen/VERSION.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
6.0.1
1+
6.0.2

loadgen/bindings/python_api.cc

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,8 @@ PYBIND11_MODULE(mlperf_loadgen, m) {
312312
&TestSettings::server_num_issue_query_threads)
313313
.def_readwrite("offline_expected_qps",
314314
&TestSettings::offline_expected_qps)
315+
.def_readwrite("sample_concatenate_permutation",
316+
&TestSettings::sample_concatenate_permutation)
315317
.def_readwrite("min_duration_ms", &TestSettings::min_duration_ms)
316318
.def_readwrite("max_duration_ms", &TestSettings::max_duration_ms)
317319
.def_readwrite("min_query_count", &TestSettings::min_query_count)
@@ -324,6 +326,14 @@ PYBIND11_MODULE(mlperf_loadgen, m) {
324326
&TestSettings::accuracy_log_rng_seed)
325327
.def_readwrite("accuracy_log_probability",
326328
&TestSettings::accuracy_log_probability)
329+
.def_readwrite("accuracy_log_sampling_target",
330+
&TestSettings::accuracy_log_sampling_target)
331+
.def_readwrite("test05", &TestSettings::test05)
332+
.def_readwrite("test05_qsl_rng_seed", &TestSettings::test05_qsl_rng_seed)
333+
.def_readwrite("test05_sample_index_rng_seed",
334+
&TestSettings::test05_sample_index_rng_seed)
335+
.def_readwrite("test05_schedule_rng_seed",
336+
&TestSettings::test05_schedule_rng_seed)
327337
.def_readwrite("print_timestamps", &TestSettings::print_timestamps)
328338
.def_readwrite("performance_issue_unique",
329339
&TestSettings::performance_issue_unique)
@@ -333,12 +343,6 @@ PYBIND11_MODULE(mlperf_loadgen, m) {
333343
&TestSettings::performance_issue_same_index)
334344
.def_readwrite("performance_sample_count_override",
335345
&TestSettings::performance_sample_count_override)
336-
.def_readwrite("test05", &TestSettings::test05)
337-
.def_readwrite("test05_qsl_rng_seed", &TestSettings::test05_qsl_rng_seed)
338-
.def_readwrite("test05_sample_index_rng_seed",
339-
&TestSettings::test05_sample_index_rng_seed)
340-
.def_readwrite("test05_schedule_rng_seed",
341-
&TestSettings::test05_schedule_rng_seed)
342346
.def_readwrite("use_token_latencies", &TestSettings::use_token_latencies)
343347
.def_readwrite("ttft_latency", &TestSettings::server_ttft_latency)
344348
.def_readwrite("tpot_latency", &TestSettings::server_tpot_latency)

loadgen/mlperf.conf

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ pointpainting.*.performance_sample_count_override = 1024
2727
deepseek-r1.*.performance_sample_count_override = 4388
2828
deepseek-r1-interactive.*.performance_sample_count_override = 4388
2929
whisper.*.performance_sample_count_override = 1633
30+
qwen3-vl-235b-a22b.*.performance_sample_count_override = 48289
3031
# set to 0 to let entire sample set to be performance sample
3132
3d-unet.*.performance_sample_count_override = 0
3233

@@ -69,7 +70,7 @@ llama3_1-8b-interactive.*.sample_concatenate_permutation = 1
6970
deepseek-r1.*.sample_concatenate_permutation = 1
7071
deepseek-r1-interactive.*.sample_concatenate_permutation = 1
7172
whisper.*.sample_concatenate_permutation = 1
72-
73+
qwen3-vl-235b-a22b.*.sample_concatenate_permutation = 1
7374
*.Server.target_latency = 10
7475
*.Server.target_latency_percentile = 99
7576
*.Server.target_duration = 0
@@ -94,7 +95,9 @@ llama3_1-8b-interactive.*.use_token_latencies = 1
9495
deepseek-r1.*.use_token_latencies = 1
9596
deepseek-r1-interactive.*.use_token_latencies = 1
9697
whisper.*.use_token_latencies = 1
97-
98+
# For the VLM benchmark, the model response is relatively short, therefore we track
99+
# end-to-end latency instead of token latencies.
100+
qwen3-vl-235b-a22b.*.use_token_latencies = 0
98101
# gptj benchmark infers token latencies
99102
gptj.*.infer_token_latencies = 1
100103
gptj.*.token_latency_scaling_factor = 69
@@ -140,6 +143,8 @@ deepseek-r1-interactive.Server.target_latency = 0
140143
deepseek-r1-interactive.Server.ttft_latency = 1500
141144
deepseek-r1-interactive.Server.tpot_latency = 15
142145

146+
qwen3-vl-235b-a22b.Server.target_latency = 12000
147+
143148
*.Offline.target_latency_percentile = 90
144149
*.Offline.min_duration = 600000
145150

@@ -164,6 +169,7 @@ mixtral-8x7b.Offline.min_query_count = 15000
164169
rgat.Offline.min_query_count = 788379
165170
deepseek-r1.Offline.min_query_count = 4388
166171
whisper.Offline.min_query_count = 1633
172+
qwen3-vl-235b-a22b.Offline.min_query_count = 48289
167173

168174
# These fields should be defined and overridden by user.conf.
169175
*.SingleStream.target_latency = 10

loadgen/setup.py

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,12 @@
2424
# and binaries. Use one of the gn build targets instead if you want
2525
# to avoid poluting the source tree.
2626

27-
from setuptools import Extension, setup
2827
from pathlib import Path
28+
2929
from pybind11 import get_include
3030
from pybind11.setup_helpers import Pybind11Extension, build_ext
31+
from setuptools import setup
3132
from version_generator import generate_loadgen_version_definitions
32-
import subprocess
3333

3434
generated_version_source_filename = "generated/version_generated.cc"
3535
generate_loadgen_version_definitions(generated_version_source_filename, ".")
@@ -42,7 +42,7 @@
4242
"test_settings.h",
4343
"issue_query_controller.h",
4444
"early_stopping.h",
45-
"query_dispatch_library.h"
45+
"query_dispatch_library.h",
4646
]
4747

4848
lib_headers = [
@@ -54,7 +54,7 @@
5454
"results.h",
5555
"bindings/c_api.h",
5656
"version_generator.py",
57-
"mlperf_conf.h"
57+
"mlperf_conf.h",
5858
]
5959

6060
lib_sources = [
@@ -93,18 +93,18 @@
9393

9494

9595
try:
96-
with open("mlperf.conf", 'r') as file:
96+
with open("mlperf.conf", "r") as file:
9797
conf_contents = file.read()
9898

9999
# Escape backslashes and double quotes
100-
conf_contents = conf_contents.replace('\\', '\\\\').replace('"', '\\"')
100+
conf_contents = conf_contents.replace("\\", "\\\\").replace('"', '\\"')
101101

102102
# Convert newlines
103-
conf_contents = conf_contents.replace('\n', '\\n"\n"')
103+
conf_contents = conf_contents.replace("\n", '\\n"\n"')
104104

105105
formatted_content = f'const char* mlperf_conf =\n"{conf_contents}";\n'
106106

107-
with open("mlperf_conf.h", 'w') as header_file:
107+
with open("mlperf_conf.h", "w") as header_file:
108108
header_file.write(formatted_content)
109109

110110
except IOError as e:
@@ -113,24 +113,25 @@
113113
mlperf_loadgen_module = Pybind11Extension(
114114
"mlperf_loadgen",
115115
define_macros=[
116-
("MAJOR_VERSION",
117-
version_split[0]),
118-
("MINOR_VERSION",
119-
version_split[1])
116+
("MAJOR_VERSION", version_split[0]),
117+
("MINOR_VERSION", version_split[1]),
120118
],
121119
include_dirs=[".", get_include()],
122120
sources=mlperf_loadgen_sources,
123121
depends=mlperf_loadgen_headers,
122+
extra_compile_args=["-std=c++14", "-O3"],
124123
)
125124

126-
setup(name="mlcommons_loadgen",
127-
version=version,
128-
description="MLPerf Inference LoadGen python bindings",
129-
url="https://mlcommons.org/",
130-
cmdclass={"build_ext": build_ext},
131-
ext_modules=[mlperf_loadgen_module],
132-
packages=['mlcommons_loadgen'],
133-
package_dir={'mlcommons_loadgen': '.'},
134-
include_package_data=True,
135-
long_description=mlperf_long_description,
136-
long_description_content_type='text/markdown')
125+
setup(
126+
name="mlcommons_loadgen",
127+
version=version,
128+
description="MLPerf Inference LoadGen python bindings",
129+
url="https://mlcommons.org/",
130+
cmdclass={"build_ext": build_ext},
131+
ext_modules=[mlperf_loadgen_module],
132+
packages=["mlcommons_loadgen"],
133+
package_dir={"mlcommons_loadgen": "."},
134+
include_package_data=True,
135+
long_description=mlperf_long_description,
136+
long_description_content_type="text/markdown",
137+
)

loadgen/test_settings.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -234,10 +234,6 @@ struct TestSettings {
234234
uint64_t test05_qsl_rng_seed = 0;
235235
uint64_t test05_sample_index_rng_seed = 0;
236236
uint64_t test05_schedule_rng_seed = 0;
237-
238-
/// \brief Load mlperf parameter config from file.
239-
int FromConfig(const std::string &path, const std::string &model,
240-
const std::string &scenario, int conf_type = 1);
241237
/**@}*/
242238

243239
// ==================================
@@ -272,6 +268,10 @@ struct TestSettings {
272268
bool infer_token_latencies = false;
273269
uint64_t token_latency_scaling_factor;
274270
/**@}*/
271+
272+
/// \brief Load mlperf parameter config from file.
273+
int FromConfig(const std::string &path, const std::string &model,
274+
const std::string &scenario, int conf_type = 1);
275275
};
276276

277277
///

0 commit comments

Comments
 (0)