2525from ..llmapi import BuildConfig , KvCacheConfig
2626from ..llmapi .llm_utils import update_llm_args_with_extra_options
2727from ..logger import logger , severity_map
28- from ..mapping import CpType
2928
3029
3130@click .group ()
7574 type = int ,
7675 default = 1 ,
7776 help = 'Pipeline parallelism size.' )
78- @click .option ("--cp_size" ,
79- type = int ,
80- default = 1 ,
81- help = 'Context parallelism size.' )
8277@click .option ("--ep_size" ,
8378 type = int ,
8479 default = None ,
110105 is_flag = True ,
111106 default = False ,
112107 help = "Flag for disabling KV cache reuse." )
113- @click .option ("--cp_config" ,
114- type = dict ,
115- default = None ,
116- help = "Context parallelism configuration as JSON." )
117108@click .pass_context
118109def main (ctx , model : str , tokenizer : Optional [str ], log_level : str ,
119110 backend : str , max_beam_width : int , max_batch_size : int ,
120111 max_num_tokens : int , max_seq_len : int , tp_size : int , pp_size : int ,
121112 ep_size : Optional [int ], gpus_per_node : Optional [int ],
122113 kv_cache_free_gpu_memory_fraction : float , trust_remote_code : bool ,
123114 revision : Optional [str ], extra_llm_api_options : Optional [str ],
124- disable_kv_cache_reuse : bool , cp_size : int , cp_config : Optional [ dict ] ):
115+ disable_kv_cache_reuse : bool ):
125116 logger .set_level (log_level )
126117 build_config = BuildConfig (max_batch_size = max_batch_size ,
127118 max_num_tokens = max_num_tokens ,
@@ -132,20 +123,11 @@ def main(ctx, model: str, tokenizer: Optional[str], log_level: str,
132123 free_gpu_memory_fraction = kv_cache_free_gpu_memory_fraction ,
133124 enable_block_reuse = not disable_kv_cache_reuse )
134125
135- if cp_config is not None and "cp_type" in cp_config :
136- cp_config = cp_config .copy ()
137- try :
138- cp_config ["cp_type" ] = CpType [cp_config ["cp_type" ].upper ()]
139- except KeyError :
140- raise ValueError (f"Invalid cp_type: { cp_config ['cp_type' ]} . " \
141- f"Must be one of: { ', ' .join ([t .name for t in CpType ])} " )
142126 llm_args = {
143127 "model" : model ,
144128 "tokenizer" : tokenizer ,
145129 "tensor_parallel_size" : tp_size ,
146130 "pipeline_parallel_size" : pp_size ,
147- "context_parallel_size" : cp_size ,
148- "cp_config" : cp_config if cp_config is not None else {},
149131 "moe_expert_parallel_size" : ep_size ,
150132 "gpus_per_node" : gpus_per_node ,
151133 "trust_remote_code" : trust_remote_code ,
0 commit comments