Skip to content

Commit f61067c

Browse files
authored
[None][chore] Defer exposing context parallel configs (#9552)
Signed-off-by: Balaram Buddharaju <[email protected]>
1 parent f155812 commit f61067c

File tree

3 files changed

+1
-31
lines changed

3 files changed

+1
-31
lines changed

tensorrt_llm/bench/benchmark/low_latency.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -121,12 +121,6 @@
121121
default=1,
122122
help="pipeline parallelism size",
123123
)
124-
@optgroup.option(
125-
"--cp",
126-
type=int,
127-
default=1,
128-
help="context parallelism size",
129-
)
130124
@optgroup.option(
131125
"--ep",
132126
type=int,

tensorrt_llm/bench/benchmark/throughput.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -202,12 +202,6 @@
202202
default=1,
203203
help="pipeline parallelism size",
204204
)
205-
@optgroup.option(
206-
"--cp",
207-
type=int,
208-
default=1,
209-
help="context parallelism size",
210-
)
211205
@optgroup.option(
212206
"--ep",
213207
type=int,

tensorrt_llm/commands/eval.py

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
from ..llmapi import BuildConfig, KvCacheConfig
2626
from ..llmapi.llm_utils import update_llm_args_with_extra_options
2727
from ..logger import logger, severity_map
28-
from ..mapping import CpType
2928

3029

3130
@click.group()
@@ -75,10 +74,6 @@
7574
type=int,
7675
default=1,
7776
help='Pipeline parallelism size.')
78-
@click.option("--cp_size",
79-
type=int,
80-
default=1,
81-
help='Context parallelism size.')
8277
@click.option("--ep_size",
8378
type=int,
8479
default=None,
@@ -110,18 +105,14 @@
110105
is_flag=True,
111106
default=False,
112107
help="Flag for disabling KV cache reuse.")
113-
@click.option("--cp_config",
114-
type=dict,
115-
default=None,
116-
help="Context parallelism configuration as JSON.")
117108
@click.pass_context
118109
def main(ctx, model: str, tokenizer: Optional[str], log_level: str,
119110
backend: str, max_beam_width: int, max_batch_size: int,
120111
max_num_tokens: int, max_seq_len: int, tp_size: int, pp_size: int,
121112
ep_size: Optional[int], gpus_per_node: Optional[int],
122113
kv_cache_free_gpu_memory_fraction: float, trust_remote_code: bool,
123114
revision: Optional[str], extra_llm_api_options: Optional[str],
124-
disable_kv_cache_reuse: bool, cp_size: int, cp_config: Optional[dict]):
115+
disable_kv_cache_reuse: bool):
125116
logger.set_level(log_level)
126117
build_config = BuildConfig(max_batch_size=max_batch_size,
127118
max_num_tokens=max_num_tokens,
@@ -132,20 +123,11 @@ def main(ctx, model: str, tokenizer: Optional[str], log_level: str,
132123
free_gpu_memory_fraction=kv_cache_free_gpu_memory_fraction,
133124
enable_block_reuse=not disable_kv_cache_reuse)
134125

135-
if cp_config is not None and "cp_type" in cp_config:
136-
cp_config = cp_config.copy()
137-
try:
138-
cp_config["cp_type"] = CpType[cp_config["cp_type"].upper()]
139-
except KeyError:
140-
raise ValueError(f"Invalid cp_type: {cp_config['cp_type']}. " \
141-
f"Must be one of: {', '.join([t.name for t in CpType])}")
142126
llm_args = {
143127
"model": model,
144128
"tokenizer": tokenizer,
145129
"tensor_parallel_size": tp_size,
146130
"pipeline_parallel_size": pp_size,
147-
"context_parallel_size": cp_size,
148-
"cp_config": cp_config if cp_config is not None else {},
149131
"moe_expert_parallel_size": ep_size,
150132
"gpus_per_node": gpus_per_node,
151133
"trust_remote_code": trust_remote_code,

0 commit comments

Comments
 (0)