OpenManus
diff --git a/‎openmanus_rl/environments/env_package/webshop/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎openmanus_rl/environments/env_package/webshop/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎openmanus_rl/environments/env_package/webshop/envs.py‎
Lines changed: 240 additions & 0 deletions b/‎openmanus_rl/environments/env_package/webshop/envs.py‎
Lines changed: 240 additions & 0 deletions
diff --git a/‎openmanus_rl/environments/env_package/webshop/projection.py‎
Lines changed: 49 additions & 0 deletions b/‎openmanus_rl/environments/env_package/webshop/projection.py‎
Lines changed: 49 additions & 0 deletions
diff --git a/‎openmanus_rl/environments/env_package/webshop/webshop/.gitignore‎
Lines changed: 18 additions & 0 deletions b/‎openmanus_rl/environments/env_package/webshop/webshop/.gitignore‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎openmanus_rl/environments/env_package/webshop/webshop/LICENSE.md‎
Lines changed: 21 additions & 0 deletions b/‎openmanus_rl/environments/env_package/webshop/webshop/LICENSE.md‎
Lines changed: 21 additions & 0 deletions
@@ -0,0 +1,2 @@
+from .projection import webshop_projection
+from .envs import build_webshop_envs
@@ -0,0 +1,240 @@
+import ray
+import gym
+import numpy as np
+
+# -----------------------------------------------------------------------------
+# Ray remote worker actor -----------------------------------------------------
+# -----------------------------------------------------------------------------
+
+@ray.remote(num_cpus=0.2)
+class WebshopWorker:
+    """Ray remote actor that replaces the worker function.
+    Each actor hosts a *WebAgentTextEnv* instance.
+    """
+    
+    def __init__(self, seed, env_kwargs):
+        # Lazy import avoids CUDA initialisation issues
+        import sys
+        import os
+        project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), 'webshop'))
+        sys.path.append(project_root)
+        from web_agent_site.envs import WebAgentTextEnv  # noqa: WPS433 (runtime import)
+        
+        env_kwargs['seed'] = seed
+        self.env = gym.make('WebAgentTextEnv-v0', **env_kwargs)
+    
+    def step(self, action):
+        """Execute a step in the environment"""
+        obs, reward, done, info = self.env.step(action)
+        info = dict(info or {})  # make a *copy* so we can mutate safely
+        info['available_actions'] = self.env.get_available_actions()
+        info['task_score'] = reward
+
+        # Redefine reward. We only use rule-based reward - win for 10, lose for 0.
+        if done and reward == 1.0:
+            info['won'] = True
+            reward = 10.0
+        else:
+            info['won'] = False
+            reward = 0
+
+        return obs, reward, done, info
+    
+    def reset(self, idx):
+        """Reset the environment with given session index"""
+        obs, info = self.env.reset(session=idx)
+        info = dict(info or {})
+        info['available_actions'] = self.env.get_available_actions()
+        info['won'] = False
+        return obs, info
+    
+    def render(self, mode_for_render):
+        """Render the environment"""
+        rendered = self.env.render(mode=mode_for_render)
+        return rendered
+    
+    def get_available_actions(self):
+        """Get available actions"""
+        return self.env.get_available_actions()
+    
+    def get_goals(self):
+        """Get environment goals"""
+        return self.env.server.goals
+    
+    def close(self):
+        """Close the environment"""
+        self.env.close()
+
+
+# -----------------------------------------------------------------------------
+# Vectorised Ray environment --------------------------------------------------
+# -----------------------------------------------------------------------------
+
+class WebshopMultiProcessEnv(gym.Env):
+    """A vectorised, Ray-based wrapper around *WebAgentTextEnv*.
+
+    ``info`` dictionaries returned by :py:meth:`step` **and** :py:meth:`reset`
+    automatically contain the key ``'available_actions'`` so downstream RL code
+    can obtain the *legal* action set without extra IPC overhead.
+    """
+    def __init__(
+        self,
+        seed: int = 0,
+        env_num: int = 1,
+        group_n: int = 1,
+        is_train: bool = True,
+        env_kwargs: dict = None,
+    ) -> None:
+        super().__init__()
+
+        # Initialize Ray if not already initialized
+        if not ray.is_initialized():
+            ray.init()
+
+        self.group_n = group_n
+        self.env_num = env_num
+        self.num_processes = env_num * group_n
+        self.is_train = is_train
+        if not is_train: assert group_n == 1
+
+        self._rng = np.random.RandomState(seed)
+
+        self._env_kwargs = env_kwargs if env_kwargs is not None else {'observation_mode': 'text', 'num_products': None}
+
+        # -------------------------- Ray actors setup --------------------------
+        self._workers = []
+
+        for i in range(self.num_processes):
+            worker = WebshopWorker.remote(seed + (i // self.group_n), self._env_kwargs)
+            self._workers.append(worker)
+
+        # Get goals from the first worker
+        goals_future = self._workers[0].get_goals.remote()
+        goals = ray.get(goals_future)
+
+        # ------- original ----------#
+        # if args.num is None:
+        #     if split == 'test':
+        #         self.goal_idxs = range(500)
+        #     elif split == 'eval':
+        #         self.goal_idxs = range(500, 1500)
+        #     elif split == 'train':
+        #         self.goal_idxs = range(1500, len(self.env.server.goals))
+        # else:
+        #     self.goal_idxs = range(len(self.env.server.goals))
+
+        if not self.is_train:
+            self.goal_idxs = range(500)
+        else:
+            self.goal_idxs = range(500, len(goals))
+            
+        print(self.goal_idxs)
+
+    # ------------------------------------------------------------------
+    # Base API ----------------------------------------------------------
+    # ------------------------------------------------------------------
+
+    def step(self, actions: list[str]):
+        if len(actions) != self.num_processes:
+            raise ValueError(
+                f'Expected {self.num_processes} actions, got {len(actions)}',
+            )
+
+        # Send step commands to all workers
+        futures = []
+        for worker, action in zip(self._workers, actions):
+            future = worker.step.remote(action)
+            futures.append(future)
+
+        # Collect results
+        results = ray.get(futures)
+        obs_list, reward_list, done_list, info_list = [], [], [], []
+        for obs, reward, done, info in results:
+            obs_list.append(obs)
+            reward_list.append(reward)
+            done_list.append(done)
+            info_list.append(info)
+
+        return obs_list, reward_list, done_list, info_list
+
+    def reset(self):
+        idx = self._rng.choice(self.goal_idxs, size=self.env_num, replace=False)
+        idx = np.repeat(idx, self.group_n).tolist()
+
+        # Send reset commands to all workers
+        futures = []
+        for worker, i in zip(self._workers, idx):
+            future = worker.reset.remote(i)
+            futures.append(future)
+
+        # Collect results
+        results = ray.get(futures)
+        obs_list, info_list = [], []
+        for obs, info in results:
+            obs_list.append(obs)
+            info_list.append(info)
+
+        return obs_list, info_list
+
+    # ------------------------------------------------------------------
+    # Convenience helpers ----------------------------------------------
+    # ------------------------------------------------------------------
+
+    def render(self, mode: str = 'text', env_idx: int = None):
+        if env_idx is not None:
+            future = self._workers[env_idx].render.remote(mode)
+            return ray.get(future)
+
+        futures = []
+        for worker in self._workers:
+            future = worker.render.remote(mode)
+            futures.append(future)
+        
+        return ray.get(futures)
+
+    # ------------------------------------------------------------------
+    # Clean‑up ----------------------------------------------------------
+    # ------------------------------------------------------------------
+
+    def close(self):
+        if getattr(self, '_closed', False):
+            return
+
+        # Close all workers and kill Ray actors
+        close_futures = []
+        for worker in self._workers:
+            future = worker.close.remote()
+            close_futures.append(future)
+        
+        # Wait for all workers to close
+        ray.get(close_futures)
+        
+        # Kill all Ray actors
+        for worker in self._workers:
+            ray.kill(worker)
+            
+        self._closed = True
+
+    def __del__(self):  # noqa: D401
+        self.close()
+
+
+# -----------------------------------------------------------------------------
+# Factory helper --------------------------------------------------------------
+# -----------------------------------------------------------------------------
+
+def build_webshop_envs(
+    seed: int = 0,
+    env_num: int = 1,
+    group_n: int = 1,
+    is_train: bool = True,
+    env_kwargs: dict = None,
+):
+    """Mirror *build_sokoban_envs* so higher‑level code can swap seamlessly."""
+    return WebshopMultiProcessEnv(
+        seed=seed,
+        env_num=env_num,
+        group_n=group_n,
+        is_train=is_train,
+        env_kwargs=env_kwargs,
+    )
@@ -0,0 +1,49 @@
+from typing import List
+import re
+
+def webshop_projection(actions: List[str]):
+    """
+    A function to process the actions.
+    actions: the list of actions to be processed, it is a list of strings.
+    Expected format:
+        <think>some reasoning...</think><action>up/down/left/right/still</action>
+    """
+
+    valids = [0] * len(actions)
+
+    for i in range(len(actions)):
+        original_str = actions[i]  # keep the original string
+        actions[i] = actions[i].lower()
+
+        # Attempt to extract the substring within <action>...</action>
+        start_tag = "<action>"
+        end_tag = "</action>"
+        start_idx = actions[i].find(start_tag)
+        end_idx = actions[i].find(end_tag)
+        try:
+            if start_idx == -1 or end_idx == -1:
+                # If we can't find a valid <action>...</action> block, mark as invalid
+                actions[i] = actions[i][-20:]  # 0 is invalid action for Sokoban
+                continue
+
+            # Extract just the content between the tags
+            extracted_action = actions[i][start_idx + len(start_tag):end_idx].strip().lower()
+            
+            actions[i] = extracted_action
+            valids[i] = 1
+
+        except:
+            # randomly choose an action from the action list if illegal
+            actions[i] = actions[i][-20:]
+
+        # check <think>...</think>
+        think_start_idx = original_str.find("<think>")
+        think_end_idx = original_str.find("</think>")
+        if think_start_idx == -1 or think_end_idx == -1:
+            valids[i] = 0
+
+        # check if contains any Chinese characters
+        if re.search(r'[\u4e00-\u9fff]', original_str):
+            valids[i] = 0
+
+    return actions, valids
@@ -0,0 +1,18 @@
+*.ipynb*
+*.pyc
+*.swp
+
+.DS_Store
+.idea/
+.pytest_cache/
+.vscode/
+
+__pycache__/
+data/
+search_engine/indexes*
+search_engine/resources*
+transfer/flagged
+user_session_logs/
+
+
+*_err_*.log
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 Princeton Natural Language Processing
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+from .projection import webshop_projection`
	`2`	`+from .envs import build_webshop_envs`