delete unused functions

Kunlun-Zhu · Kunlun-Zhu · commit 5130b11b551e · 2025-05-07T17:59:47.000Z
diff --git a/.gitignore b/.gitignore
@@ -28,4 +28,6 @@ data/output_txt_files/
 
 outputs/
 wandb/
-logs/
+logs/
+verl_checkpoints/
+verl.egg-info/
diff --git a/openmanus_rl/llm_agent/openmanus.py b/openmanus_rl/llm_agent/openmanus.py
@@ -174,95 +174,6 @@ def _batch_tokenize(self, responses: List[str]) -> torch.Tensor:
             padding="longest"
         )['input_ids']
 
-    def _postprocess_responses(self, responses: torch.Tensor) -> Tuple[torch.Tensor, List[str]]:
-        """
-        Process responses to stop at tool call or final response.
-        Handles tags like <action> and </action> or <response> and </response>.
-        
-        Args:
-            responses: Tensor containing response token IDs
-            
-        Returns:
-            Tuple of (processed response tensor, processed response strings)
-        """
-        # Decode responses to strings
-        responses_str = self.tokenizer.batch_decode(
-            responses, 
-            skip_special_tokens=True
-        )
-
-        # Process each response to extract action/response content
-        processed_responses = []
-        for resp in responses_str:
-            if '</action>' in resp:
-                # Stop at end of action
-                processed = resp.split('</action>')[0] + '</action>'
-            elif '</response>' in resp:
-                # Stop at end of response
-                processed = resp.split('</response>')[0] + '</response>'
-            else:
-                # No recognized end tag, keep as is
-                processed = resp
-            processed_responses.append(processed)
-
-        # Re-tokenize processed responses
-        responses = self._batch_tokenize(processed_responses)
-        
-        return responses, processed_responses
-
-    def _process_next_obs(self, next_obs: List[str]) -> torch.Tensor:
-        """
-        Process next observations from environment.
-        Tokenizes observations and handles maximum length constraints.
-        
-        Args:
-            next_obs: List of observation strings from the environment
-            
-        Returns:
-            Tensor of tokenized observations
-        """
-        # Tokenize observations with consistent padding
-        next_obs_ids = self.tokenizer(
-            next_obs, 
-            padding='longest',
-            return_tensors='pt',
-            add_special_tokens=False,  # Prevents adding special tokens
-        )['input_ids']
-
-        # Truncate if observations are too long
-        if next_obs_ids.shape[1] > self.config.max_obs_length:
-            print(f"[WARNING] OBSERVATION TOO LONG, CONSIDER CHANGING YOUR CONFIG, {next_obs_ids.shape[1]} & {self.config.max_obs_length}")            
-            # Truncate to max_obs_length
-            next_obs_ids = next_obs_ids[:, :self.config.max_obs_length]
-
-        return next_obs_ids
-
-    def _update_rolling_state(self, rollings: DataProto, cur_responses: torch.Tensor, 
-                            next_obs_ids: torch.Tensor) -> DataProto:
-        """Update rolling state with new responses and observations."""
-        # Concatenate and handle padding        
-        new_input_ids = self.tensor_fn.concatenate_with_padding([
-            rollings.batch['input_ids'],
-            cur_responses,
-            next_obs_ids
-        ])
-        
-        # Create attention mask and position ids
-        new_attention_mask = self.tensor_fn.create_attention_mask(new_input_ids)
-        new_position_ids = self.tensor_fn.create_position_ids(new_attention_mask)
-
-        # Cut to appropriate length
-        effective_len = new_attention_mask.sum(dim=1).max()
-        max_len = min(self.config.max_prompt_length, effective_len)
-
-        new_rollings = DataProto.from_dict({
-            'input_ids': new_input_ids[:, -max_len:],
-            'position_ids': new_position_ids[:, -max_len:],
-            'attention_mask': new_attention_mask[:, -max_len:]
-        })
-        new_rollings.meta_info.update(rollings.meta_info)
-        
-        return new_rollings
 
     def _run_single_rollout(self, initial_prompt_ids: torch.Tensor, task_idx: int, client: Any) -> Dict[str, Any]:
         """
diff --git a/openmanus_rl/llm_agent/tool_manager.py b/openmanus_rl/llm_agent/tool_manager.py
diff --git a/train_grpo.sh b/train_grpo.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 # --- Configuration (defaults, can be overridden via env vars) ---
-export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-0,1,2,3,4,5,6,7}
+export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-1,2,3,4}
 WAND_PROJECT=${WAND_PROJECT:-'OpenManus-rl'}
 export BASE_MODEL=${BASE_MODEL:-'Qwen/Qwen2.5-3B'}
 AGENTGYM_HOST=${AGENTGYM_HOST:-'0.0.0.0'} # Default to 0.0.0.0 for external access
@@ -99,32 +99,6 @@ case $AGENTGYM_ENV_NAME in
         echo "Error: Unsupported environment name '$AGENTGYM_ENV_NAME'"; usage;;
 esac
 
-# --- Environment Dependency Installation (in‑place) ---
-ENV_SETUP_DIR="openmanus_rl/agentgym/agentenv-${AGENTGYM_ENV_NAME}"
-if [ -d "$ENV_SETUP_DIR" ]; then
-    echo -e "\n[Setup] Preparing AgentGym environment '$AGENTGYM_ENV_NAME' from $ENV_SETUP_DIR ..."
-    pushd "$ENV_SETUP_DIR" > /dev/null || { echo "Failed to enter $ENV_SETUP_DIR"; exit 1; }
-
-    # install requirements
-    if [ -f "requirements.txt" ]; then
-        echo "[Setup] Installing Python requirements ..."
-        pip install --no-cache-dir -r requirements.txt
-    fi
-    # run setup.sh
-    if [ -f "setup.sh" ]; then
-        echo "[Setup] Running setup.sh ..."
-        bash setup.sh
-    fi
-    # editable install
-    if [ -f "setup.py" ] || [ -f "pyproject.toml" ]; then
-        echo "[Setup] Installing environment package (editable) ..."
-        pip install -e .
-    fi
-    popd > /dev/null
-    echo "[Setup] Environment '$AGENTGYM_ENV_NAME' ready."
-else
-    echo "[Setup] WARNING: $ENV_SETUP_DIR not found; skipping env‑specific installation."
-fi
 
 # --- Start AgentGym Servers in Dedicated Environment ---
 TARGET_ENV_NAME="agentenv-${AGENTGYM_ENV_NAME}"
@@ -314,8 +288,8 @@ hydra_overrides=(
     "actor_rollout_ref.actor.optim.lr=1e-6"
     "actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.95"
     "actor_rollout_ref.actor.use_kl_loss=true"
-    "actor_rollout_ref.actor.ppo_mini_batch_size=256"
-    "actor_rollout_ref.actor.ppo_micro_batch_size=64"
+    "actor_rollout_ref.actor.ppo_mini_batch_size=32"
+    "actor_rollout_ref.actor.ppo_micro_batch_size=32"
     "actor_rollout_ref.actor.fsdp_config.param_offload=true"
     "actor_rollout_ref.actor.fsdp_config.grad_offload=true"
     "actor_rollout_ref.actor.fsdp_config.optimizer_offload=true"
@@ -346,7 +320,7 @@ hydra_overrides=(
     "+trainer.val_only=false"
     "+trainer.val_before_train=true"
     "trainer.default_hdfs_dir=null"
-    "trainer.n_gpus_per_node=8"
+    "trainer.n_gpus_per_node=4"
     "trainer.nnodes=1"
     "trainer.save_freq=100"
     "trainer.test_freq=50"
diff --git a/train_ppo.sh b/train_ppo.sh
@@ -296,7 +296,7 @@ hydra_overrides=(
     "actor_rollout_ref.model.enable_gradient_checkpointing=true"
     "actor_rollout_ref.model.use_remove_padding=True"
     "actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.95"
-    "actor_rollout_ref.actor.ppo_mini_batch_size=2"
+    "actor_rollout_ref.actor.ppo_mini_batch_size=4"
     "actor_rollout_ref.actor.ppo_micro_batch_size=4"
     "actor_rollout_ref.actor.fsdp_config.param_offload=true"
     "actor_rollout_ref.actor.fsdp_config.grad_offload=true"
@@ -329,7 +329,7 @@ hydra_overrides=(
     "+trainer.val_only=false"
     "+trainer.val_before_train=true"
     "trainer.default_hdfs_dir=null"
-    "trainer.n_gpus_per_node=3"
+    "trainer.n_gpus_per_node=4"
     "trainer.nnodes=1"
     "trainer.save_freq=100"
     "trainer.test_freq=50"