@@ -27,16 +27,21 @@ DEFINE_string(host, "", "Host name for brpc server.");
2727
2828DEFINE_int32 (port, 8010 , " Port for brpc server." );
2929
30- DEFINE_int32 (idle_timeout_s,
30+ DEFINE_int32 (
31+ rpc_idle_timeout_s,
32+ -1 ,
33+ " Connection will be closed if there is no read/write operations "
34+ " during the last `rpc_idle_timeout_s`. -1 means wait indefinitely." );
35+
36+ DEFINE_int32 (rpc_channel_timeout_ms,
3137 -1 ,
32- " Connection will be closed if there is no read/write operations "
33- " during the last `idle_timeout_s`. -1 means wait indefinitely." );
38+ " Max duration of bRPC Channel. -1 means wait indefinitely." );
3439
35- DEFINE_int32 (num_threads, 32 , " Number of threads to process requests." );
40+ DEFINE_int32 (max_reconnect_count,
41+ 40 ,
42+ " The max count for worker try to connect to server." );
3643
37- DEFINE_int32 (max_concurrency,
38- 0 ,
39- " Limit number of requests processed in parallel." );
44+ DEFINE_int32 (num_threads, 32 , " Number of threads to process requests." );
4045
4146DEFINE_int32 (
4247 max_concurrent_requests,
@@ -74,11 +79,13 @@ DEFINE_bool(enable_mla,
7479 false ,
7580 " Whether to enable multi-head latent attention." );
7681
82+ // --- graph mode execution config ---
83+
7784DEFINE_bool (enable_acl_graph,
7885 false ,
7986 " Whether to enable ACL graph execution for decode phase." );
8087
81- DEFINE_int32 (max_tokens_per_seq ,
88+ DEFINE_int32 (max_seq_len_for_graph_mode ,
8289 20480 ,
8390 " Maximum number of tokens per sequence for ACL graph execution." );
8491
@@ -91,11 +98,13 @@ DEFINE_int32(limit_image_per_prompt,
9198
9299// --- threading config ---
93100
94- DEFINE_int32 (num_handling_threads, 4 , " Number of handling threads." );
101+ DEFINE_int32 (num_request_handling_threads,
102+ 4 ,
103+ " Number of threads for handling input requests." );
95104
96105DEFINE_int32 (num_response_handling_threads,
97106 4 ,
98- " Number of response handling threads ." );
107+ " Number of threads for handling responses ." );
99108
100109// --- kvcache config ---
101110
@@ -141,9 +150,10 @@ DEFINE_bool(use_zero_evict,
141150 false ,
142151 " Use ZeroEvictionScheduler but ContinuousScheduler." );
143152
144- DEFINE_int32 (max_decode_token_per_sequence,
145- 256 ,
146- " Max decode token per sequence." );
153+ DEFINE_int32 (
154+ max_decode_token_per_sequence,
155+ 256 ,
156+ " Max decode token per sequence which used for ZeroEvictionScheduler." );
147157
148158// --- parallel config ---
149159
@@ -168,10 +178,10 @@ DEFINE_int64(eplb_update_interval, 1000, "EPLB update rate.");
168178
169179DEFINE_double (eplb_update_threshold, 0.8 , " EPLB update threshold." );
170180
171- DEFINE_string (rank_tablefile, " " , " ATB HCCL rank table file." );
172-
173181DEFINE_int32 (expert_parallel_degree, 0 , " Expert parallel degree." );
174182
183+ DEFINE_string (rank_tablefile, " " , " ATB HCCL rank table file." );
184+
175185// --- profile config ---
176186
177187DEFINE_bool (enable_profile_step_time,
@@ -261,20 +271,8 @@ DEFINE_string(kv_cache_transfer_mode,
261271 " PUSH" ,
262272 " The mode of kv cache transfer(e.g. PUSH, PULL)." );
263273
264- DEFINE_string (device_ip, " " , " The device ip." );
265-
266274DEFINE_int32 (transfer_listen_port, 26000 , " The KVCacheTranfer listen port." );
267275
268- // --- worker server config ---
269-
270- DEFINE_int32 (max_connect_count,
271- 40 ,
272- " The max count for worker try to connect to server." );
273-
274- DEFINE_int32 (sleep_time_second,
275- 3 ,
276- " The sleep time for worker try to connect to server next time." );
277-
278276DEFINE_bool (enable_shm,
279277 true ,
280278 " Whether to enable shared memory for executing model." );
@@ -311,10 +309,6 @@ DEFINE_double(heart_beat_interval, 0.5, "Heart beat interval.");
311309
312310DEFINE_int32 (etcd_ttl, 3 , " Time to live for etcd." );
313311
314- DEFINE_int32 (timeout_ms,
315- -1 ,
316- " Max duration of bRPC Channel. -1 means wait indefinitely." );
317-
318312// --- priority strategy config ---
319313
320314DEFINE_string (priority_strategy,
@@ -354,7 +348,7 @@ DEFINE_bool(
354348 " Whether to enable computation communication parallel by two streams "
355349 " and two micro batches in prefill stage." );
356350
357- DEFINE_int32 (default_micro_batch_num ,
351+ DEFINE_int32 (micro_batch_num ,
358352 2 ,
359353 " Default use two micro batches for multi-stream parallel." );
360354
@@ -368,7 +362,7 @@ DEFINE_bool(enable_continuous_kvcache,
368362 " Whether to enable continuous kv cache." );
369363
370364DEFINE_int64 (
371- granularity_size ,
365+ phy_page_granularity_size ,
372366 2 * 1024 * 1024 ,
373367 " Granularity size for one physical page in bytes, default 2MB, when enable "
374368 " continuous kv cache." );
@@ -388,4 +382,4 @@ DEFINE_bool(enable_beam_search_kernel,
388382 " Whether to enable beam search kernel." );
389383
390384// --- qwen3 reranker config
391- DEFINE_bool (enable_qwen3_reranker, false , " Whether to enable qwen3 reranker." );
385+ DEFINE_bool (enable_qwen3_reranker, false , " Whether to enable qwen3 reranker." );
0 commit comments