Skip to content

Commit 1fce978

Browse files
committed
16384
Signed-off-by: Terry Kong <terryk@nvidia.com>
1 parent 45d0ad3 commit 1fce978

File tree

2 files changed

+2
-2
lines changed

2 files changed

+2
-2
lines changed

examples/configs/grpo_math_1B.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ policy:
224224
use_cuda_graphs_for_non_decode_steps: true # Enable CUDA graphs for prefill/context processing
225225
enable_chunked_prefill: true # Split long prefills into chunks for better memory management
226226
unified_memory_level: 0 # Unified memory usage level (0=disabled, higher values enable more aggressive paging)
227-
max_tokens: 16834 # Maximum number of tokens to use in a single step. Analogous to vllm's max_num_batched_tokens
227+
max_tokens: 16384 # Maximum number of tokens to use in a single step. Analogous to vllm's max_num_batched_tokens
228228
vllm_cfg:
229229
async_engine: false
230230
precision: ${policy.precision}

examples/configs/grpo_math_1B_megatron.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ policy:
150150
use_cuda_graphs_for_non_decode_steps: true # Enable CUDA graphs for prefill/context processing
151151
enable_chunked_prefill: true # Split long prefills into chunks for better memory management
152152
unified_memory_level: 0 # Unified memory usage level (0=disabled, higher values enable more aggressive paging)
153-
max_tokens: 16834 # Maximum number of tokens to use in a single step. Analogous to vllm's max_num_batched_tokens
153+
max_tokens: 16384 # Maximum number of tokens to use in a single step. Analogous to vllm's max_num_batched_tokens
154154

155155
vllm_cfg:
156156
tensor_parallel_size: 1

0 commit comments

Comments
 (0)