We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent cc84132 commit a0474b3Copy full SHA for a0474b3
tests/functional/grpo_multiturn.sh
@@ -29,7 +29,7 @@ uv run coverage run -a --data-file=$PROJECT_ROOT/tests/.coverage --source=$PROJE
29
policy.max_total_sequence_length=1024 \
30
policy.train_global_batch_size=4 \
31
policy.train_micro_batch_size=1 \
32
- policy.generation.top_p=0.99 \
+ policy.generation.top_p=0.999 \
33
policy.generation.top_k=8000 \
34
logger.tensorboard_enabled=true \
35
logger.log_dir=$LOG_DIR \
0 commit comments