diff --git a/.github/workflows/unittest.yaml b/.github/workflows/unittest.yaml index 764ff2f6f4..e448d1cfd1 100644 --- a/.github/workflows/unittest.yaml +++ b/.github/workflows/unittest.yaml @@ -24,19 +24,23 @@ jobs: working-directory: trinity-${{ github.run_id }}/.github/workflows/docker run: | export UID - export GID + export GID=$(id -g) docker compose up -d sleep 15s - name: Check ray status working-directory: trinity-${{ github.run_id }}/.github/workflows/docker run: | + export UID + export GID=$(id -g) docker compose exec trinity-node-1 ray status docker compose exec trinity-node-2 ray status - name: Run unittest working-directory: trinity-${{ github.run_id }}/.github/workflows/docker run: | + export UID + export GID=$(id -g) docker compose exec trinity-node-1 pytest tests --ignore=tests/data --ctrf report.json continue-on-error: true diff --git a/trinity/buffer/reader/sql_reader.py b/trinity/buffer/reader/sql_reader.py index 2fac7416d2..d7a826fdfa 100644 --- a/trinity/buffer/reader/sql_reader.py +++ b/trinity/buffer/reader/sql_reader.py @@ -74,6 +74,6 @@ def read(self, strategy: Optional[ReadStrategy] = None) -> List: exp_list.extend([self.table_model_cls.to_experience(exp) for exp in experiences]) logger.info(f"get {len(exp_list)} experiences:") logger.info(f"reward = {[exp.reward for exp in exp_list]}") - logger.info(f"fisrt prompt_text = {exp_list[0].prompt_text}") + logger.info(f"first prompt_text = {exp_list[0].prompt_text}") logger.info(f"first response_text = {exp_list[0].response_text}") return exp_list diff --git a/trinity/common/verl_config.py b/trinity/common/verl_config.py index 4e85fdb5bc..a9d7dd6cb4 100644 --- a/trinity/common/verl_config.py +++ b/trinity/common/verl_config.py @@ -68,6 +68,7 @@ class Checkpoint: class Actor: strategy: str = "fsdp" ppo_mini_batch_size: int = 256 + ppo_micro_batch_size: Optional[int] = None ppo_micro_batch_size_per_gpu: int = 1 use_dynamic_bsz: bool = False ppo_max_token_len_per_gpu: int = ( @@ -94,6 +95,7 @@ class Actor: @dataclass class Ref: fsdp_config: FSDPConfig = field(default_factory=FSDPConfig) + log_prob_micro_batch_size: Optional[int] = None log_prob_micro_batch_size_per_gpu: int = 1 log_prob_use_dynamic_bsz: bool = False log_prob_max_token_len_per_gpu: int = 0 @@ -119,6 +121,7 @@ class Rollout: max_num_batched_tokens: int = 8192 max_model_len: Optional[int] = None max_num_seqs: int = 1024 + log_prob_micro_batch_size: Optional[int] = None log_prob_micro_batch_size_per_gpu: int = 1 log_prob_use_dynamic_bsz: bool = False log_prob_max_token_len_per_gpu: int = 0 @@ -155,6 +158,7 @@ class Critic: optim: Optim = field(default_factory=Optim) model: CriticModel = field(default_factory=CriticModel) ppo_mini_batch_size: int = 0 + ppo_micro_batch_size: Optional[int] = None ppo_micro_batch_size_per_gpu: int = 1 forward_micro_batch_size: Optional[int] = None forward_micro_batch_size_per_gpu: Optional[int] = None