Skip to content

测试代码中的训练问题 #319

@JiangWu0826

Description

@JiangWu0826
project: "Trinity-RFT-gsm8k"
name: "qwen2.5-1.5B-gsm8k"
checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
algorithm:
  algorithm_type: grpo
  repeat_times: 8   
model:
  model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
  max_response_tokens: 1024   
  max_model_len: 1280   
cluster:
  node_num: 1
  gpu_per_node: 8   # 8 GPUs per node
buffer:
  total_epochs: 1   #
  batch_size: 96
  explorer_input:
    taskset:
      name: gsm8k
      storage_type: file
      path: 'openai/gsm8k'
      subset_name: 'main'
      split: 'train'
      format:
        prompt_key: 'question'
        response_key: 'answer'
      rollout_args:
        temperature: 1.0
    eval_tasksets:
    - name: gsm8k-eval
      storage_type: file
      path: 'openai/gsm8k'
      subset_name: 'main'
      split: 'test'
      format:
        prompt_key: 'question'
        response_key: 'answer'
    default_workflow_type: 'math_workflow'     #奖励形式
  trainer_input:
    experience_buffer:
      name: gsm8k_buffer
      storage_type: queue
      path: 'sqlite:///gsm8k.db'
explorer:
  eval_interval: 50
  runner_per_model: 8
  rollout_model:
    engine_num: 4           # 2 个 vLLM 引擎
    tensor_parallel_size: 1
    enable_prefix_caching: false
    enforce_eager: true
    dtype: bfloat16
    seed: 42
synchronizer:   
  sync_method: 'nccl'  
  sync_interval: 1
  sync_timeout: 1200
trainer:
  trainer_type: 'verl' 
  save_interval: 100  
  trainer_config:
    actor_rollout_ref:
      model:
        use_remove_padding: true
      actor:
        use_dynamic_bsz: true
        ppo_max_token_len_per_gpu: 16384
        ulysses_sequence_parallel_size: 1
        optim:
          lr: 1e-5
      ref:
        log_prob_use_dynamic_bsz: ${trainer.trainer_config.actor_rollout_ref.actor.use_dynamic_bsz}
        log_prob_max_token_len_per_gpu: ${trainer.trainer_config.actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
        ulysses_sequence_parallel_size: ${trainer.trainer_config.actor_rollout_ref.actor.ulysses_sequence_parallel_size} # sp size

开发者您好,trinity run --config examples/grpo_gsm8k/gsm8k.yaml,我在运行上面的示例代码中,使用您这边提供的默认参数,但是遇到了No more samples to train. Stopping training.请问这个怎么解决呢。我的GPUs是8张46GB的A40。
Image

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions