Skip to content

Commit 70317dc

Browse files
authored
Merge branch 'main' into feat/autotuner_distribute_tuning_part_2
2 parents 03b3756 + 5bc7ffe commit 70317dc

File tree

49 files changed

+1506
-621
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+1506
-621
lines changed

examples/disaggregated/slurm/benchmark/submit.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -294,14 +294,14 @@ def submit_job(config, log_dir, dry_run):
294294
env_config['model_path'],
295295
str(allocation["port"]),
296296
benchmark_config['mode'],
297-
benchmark_config['concurrency_list'],
297+
f"'{benchmark_config['concurrency_list']}'",
298298
str(slurm_config['numa_bind']).lower(),
299299
log_dir,
300300
str(profiling_config['nsys_on']).lower(),
301-
profiling_config['gen_profile_range'] if server_type == "GEN"
302-
else profiling_config['ctx_profile_range'],
301+
f"'{profiling_config['gen_profile_range']}'" if server_type
302+
== "GEN" else f"'{profiling_config['ctx_profile_range']}'",
303303
gen_config_path if server_type == "GEN" else ctx_config_path,
304-
f'"{cur_worker_env_var}"',
304+
f"'{cur_worker_env_var}'",
305305
f"&> {log_dir}/3_output_{server_type}_{server_id}.log &",
306306
]
307307
start_server_cmds.append(" ".join(cmd))
@@ -343,21 +343,21 @@ def submit_job(config, log_dir, dry_run):
343343
if benchmark_config['use_nv_sa_benchmark']:
344344
benchmark_cmd = [
345345
f"bash {env_config['work_dir']}/run_benchmark_nv_sa.sh",
346-
f"{env_config['model_path']} {isl} {osl} {benchmark_config['benchmark_ratio']} {benchmark_config['multi_round']} {gen_num} {benchmark_config['concurrency_list']} {benchmark_config['streaming']} {log_dir} {disagg_server_hostname} {disagg_server_port}",
346+
f"'{env_config['model_path']}' {isl} {osl} {benchmark_config['benchmark_ratio']} {benchmark_config['multi_round']} {gen_num} '{benchmark_config['concurrency_list']}' {benchmark_config['streaming']} '{log_dir}' {disagg_server_hostname} {disagg_server_port}",
347347
f"&> {log_dir}/6_bench.log"
348348
]
349349
client_cmds.append(" ".join(client_slurm_prefix + benchmark_cmd))
350350
else:
351351
benchmark_cmd = [
352352
f"bash {env_config['work_dir']}/run_benchmark.sh",
353-
f"{env_config['model_path']} {benchmark_config['dataset_file']} {benchmark_config['multi_round']} {gen_num} {benchmark_config['concurrency_list']} {benchmark_config['streaming']} {log_dir} {disagg_server_hostname} {disagg_server_port}",
353+
f"'{env_config['model_path']}' '{benchmark_config['dataset_file']}' {benchmark_config['multi_round']} {gen_num} '{benchmark_config['concurrency_list']}' {benchmark_config['streaming']} '{log_dir}' {disagg_server_hostname} {disagg_server_port}",
354354
f"&> {log_dir}/6_bench.log"
355355
]
356356
client_cmds.append(" ".join(client_slurm_prefix + benchmark_cmd))
357357
if config['accuracy']['enable_accuracy_test']:
358358
accuracy_cmd = [
359359
f"bash {env_config['work_dir']}/accuracy_eval.sh",
360-
f"{log_dir} {config['accuracy']['model']} {config['accuracy']['tasks']} {env_config['model_path']} {config['accuracy']['model_args_extra']} {log_dir}/accuracy_eval {disagg_server_hostname} {disagg_server_port}",
360+
f"'{log_dir}' '{config['accuracy']['model']}' '{config['accuracy']['tasks']}' '{env_config['model_path']}' '{config['accuracy']['model_args_extra']}' '{log_dir}/accuracy_eval' {disagg_server_hostname} {disagg_server_port}",
361361
f"&> {log_dir}/7_accuracy_eval.log"
362362
]
363363
client_cmds.append(" ".join(client_slurm_prefix + accuracy_cmd))

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ triton==3.5.0
6969
tiktoken
7070
blobfile
7171
openai-harmony==0.0.4
72-
nvidia-cutlass-dsl==4.3.1; python_version >= "3.10"
72+
nvidia-cutlass-dsl==4.3.4; python_version >= "3.10"
7373
plotly
7474
numexpr<2.14.0 # WAR for attempted use of nonexistent numpy.typing
7575
partial_json_parser

security_scanning/examples/models/contrib/grok/poetry.lock

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

security_scanning/examples/models/core/qwen/poetry.lock

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

security_scanning/examples/models/core/qwenvl/poetry.lock

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

security_scanning/metadata.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
{
2-
"commit_hash": "066b6539407405bfd33cde628c00a67b88d4d270",
3-
"timestamp": "2025-12-22T02:39:59Z"
2+
"commit_hash": "1e82ff7a0c5a0b525eb22d57e48efb21fc79087e",
3+
"timestamp": "2025-12-23T02:42:08Z"
44
}

security_scanning/poetry.lock

Lines changed: 13 additions & 13 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

security_scanning/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ triton = "3.5.0"
7272
tiktoken = "^0.12.0"
7373
blobfile = "^3.1.0"
7474
openai-harmony = "0.0.4"
75-
nvidia-cutlass-dsl = "4.3.1"
75+
nvidia-cutlass-dsl = "4.3.4"
7676
plotly = "^6.5.0"
7777
numexpr = "<2.14.0"
7878
partial-json-parser = "^0.2.1.1.post7"

security_scanning/tests/integration/defs/perf/poetry.lock

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tensorrt_llm/_torch/auto_deploy/config/default.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ transforms:
8181
sharding_source: ['manual', 'factory', 'heuristic']
8282
support_partial_config: true
8383
sharding_dims: ['tp', 'ep', 'bmm']
84+
shard_all_unprocessed: true
8485
allreduce_strategy: 'NCCL'
8586
dist_backend: auto
8687
requires_shape_prop: true

0 commit comments

Comments
 (0)