Skip to content

Commit 3091442

Browse files
authored
Merge pull request #76 from OpenManus/murphy/dev-0813
OpenManus Rollout
2 parents 3312239 + ce57499 commit 3091442

File tree

102 files changed

+4889
-4
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

102 files changed

+4889
-4
lines changed

.gitignore

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,4 +31,8 @@ wandb/
3131
logs/
3232
verl_checkpoints
3333
verl_checkpoints/
34-
verl.egg-info/
34+
verl.egg-info/
35+
36+
test_memory.md
37+
38+
trajectories/traj_*.json

.gitmodules

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
[submodule "verl"]
22
path = verl
3-
url = git@github.com:realtmxi/verl.git
3+
url = https://github.com/realtmxi/verl.git
44
branch = main

openmanus_rl/environments/prompts/alfworld.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,54 @@
1818
Now it's your turn to take an action.
1919
You should first reason step-by-step about the current situation. This reasoning process MUST be enclosed within <think> </think> tags.
2020
Once you've finished your reasoning, you should choose an admissible action for current step and present it within <action> </action> tags.
21+
"""
22+
23+
ALFWORLD_OPENMANUS_TEMPLATE = """
24+
You are an expert agent operating in the ALFRED Embodied Environment. Your task is to: {task_description}
25+
Prior to this step, you have already taken {step_count} step(s). Below are the most recent {history_length} observations and the corresponding actions you took: {action_history}
26+
You are now at step {current_step} and your current observation is: {current_observation}
27+
Your admissible actions of the current situation are: [{admissible_actions}].
28+
29+
Now it's your turn to take an action. Please output your response using the following separated XML tags:
30+
31+
First, analyze the current situation and plan:
32+
<think>
33+
Analyze the current situation and devise a plan to accomplish the task: {task_description}
34+
What are the key steps needed to complete this task?
35+
Based on the current observation, what should be our immediate next step?
36+
How does this action advance our plan toward completing the task?
37+
</think>
38+
39+
Then, if this is not the first step (step_count > 0), reflect on the last action:
40+
<reflection>
41+
Last observation analysis: Have we made progress toward solving the task?
42+
What did the last action accomplish? Was it successful or did it encounter any issues?
43+
Are we closer to completing the task?
44+
</reflection>
45+
46+
Next, analyze your memory and past experiences:
47+
48+
<memory_analysis>
49+
RAG-style retrieval from history:
50+
51+
[Thinking history - cite specific past reasoning from previous steps]
52+
Example: "At step 3, I reasoned that we needed to find a knife first before attempting to slice..."
53+
Example: "In step 5's thinking, I identified that the fridge typically contains perishable items..."
54+
55+
[Observation/Action history - cite specific observations and outcomes]
56+
Example: "Step 2 observation: 'You are in the kitchen. You see a countertop 1, a cabinet 1...' - this revealed the kitchen layout"
57+
Example: "Step 4 action 'go to fridge 1' succeeded and revealed tomato, lettuce..."
58+
Example: "Step 6 failed with 'Nothing happens' when trying to take knife from drawer 2"
59+
60+
[Milestone tracking]
61+
- Completed: Found target object at step X, Successfully picked up item at step Y
62+
- Current state: Holding [items], Located at [location]
63+
</memory_analysis>
64+
65+
Finally, present your chosen action:
66+
67+
<action>
68+
action_choice: [selected admissible action from the list]
69+
action_parameters: {{relevant details about the action if applicable}}
70+
</action>
2171
"""

openmanus_rl/memory/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,4 @@
1-
from .memory import SimpleMemory
1+
from .memory import SimpleMemory
2+
from .file_memory import FileMemory
3+
4+
__all__ = ['SimpleMemory', 'FileMemory']

openmanus_rl/memory/file_memory.py

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
"""
2+
Extended memory system with file persistence (memory.md).
3+
Builds on SimpleMemory to add query and storage capabilities.
4+
"""
5+
6+
from typing import List, Dict, Any, Tuple, Optional
7+
from .memory import SimpleMemory
8+
9+
10+
class FileMemory(SimpleMemory):
11+
"""
12+
Extended memory that adds file persistence and query capabilities.
13+
Inherits from SimpleMemory for compatibility, adds memory.md support.
14+
"""
15+
16+
def __init__(self, memory_file: str = "memory.md"):
17+
super().__init__()
18+
self.memory_file = memory_file
19+
self.file_cache = [] # Recent entries from file
20+
self._load_file_cache()
21+
22+
def _load_file_cache(self, limit: int = 100):
23+
"""Load recent entries from memory.md into cache."""
24+
self.file_cache = []
25+
try:
26+
with open(self.memory_file, 'r') as f:
27+
lines = f.readlines()
28+
# Keep last N entries
29+
self.file_cache = lines[-limit:] if len(lines) > limit else lines
30+
except FileNotFoundError:
31+
pass # File doesn't exist yet
32+
33+
def store_to_file(self, content: str, episode: str = "", step: int = 0):
34+
"""
35+
Store content to memory.md file.
36+
37+
Args:
38+
content: Text to store
39+
episode: Episode identifier
40+
step: Step number
41+
"""
42+
with open(self.memory_file, 'a') as f:
43+
metadata = f"E:{episode}|S:{step}" if episode else f"S:{step}"
44+
f.write(f"\n[{metadata}] {content}\n")
45+
46+
# Update cache
47+
entry = f"[{metadata}] {content}\n"
48+
self.file_cache.append(entry)
49+
if len(self.file_cache) > 100:
50+
self.file_cache.pop(0)
51+
52+
def query(self, query: str, limit: int = 3) -> str:
53+
"""
54+
Query memory for relevant information.
55+
Searches both in-memory data and file cache.
56+
57+
Args:
58+
query: Search query
59+
limit: Maximum number of results
60+
61+
Returns:
62+
Formatted string of matching memories
63+
"""
64+
results = []
65+
query_lower = query.lower()
66+
67+
# Search in file cache first (more persistent memories)
68+
for line in reversed(self.file_cache):
69+
if query_lower in line.lower():
70+
results.append(line.strip())
71+
if len(results) >= limit:
72+
break
73+
74+
# If not enough results, search in-memory data
75+
if len(results) < limit and self._data:
76+
for env_data in reversed(self._data):
77+
for record in reversed(env_data):
78+
# Search in all fields
79+
for value in record.values():
80+
if isinstance(value, str) and query_lower in value.lower():
81+
results.append(str(record))
82+
break
83+
if len(results) >= limit:
84+
break
85+
if len(results) >= limit:
86+
break
87+
88+
return "\n".join(results) if results else "No relevant memory found"
89+
90+
def store_staged(self, staged_data: Dict[str, Any], episode: str = "", step: int = 0):
91+
"""
92+
Store data from staged processing.
93+
94+
Args:
95+
staged_data: Dictionary containing plan, action, reflection, etc.
96+
episode: Episode identifier
97+
step: Step number
98+
"""
99+
# Store important parts to file
100+
if staged_data.get('plan'):
101+
self.store_to_file(f"[Plan] {staged_data['plan']}", episode, step)
102+
103+
if staged_data.get('memory_store'):
104+
self.store_to_file(staged_data['memory_store'], episode, step)
105+
106+
if staged_data.get('reflection'):
107+
self.store_to_file(f"[Reflection] {staged_data['reflection']}", episode, step)
108+
109+
# Also store in regular memory structure for compatibility
110+
if self._data is not None:
111+
record = {
112+
'text_obs': staged_data.get('plan', ''),
113+
'action': staged_data.get('action', ''),
114+
'reflection': staged_data.get('reflection', '')
115+
}
116+
# Store for all environments (broadcast)
117+
broadcast_record = {k: [v] * self.batch_size for k, v in record.items()}
118+
self.store(broadcast_record)
119+
120+
def clear_file(self):
121+
"""Clear the memory.md file."""
122+
open(self.memory_file, 'w').close()
123+
self.file_cache = []
124+
125+
def get_recent_from_file(self, n: int = 10) -> List[str]:
126+
"""Get n most recent entries from file cache."""
127+
return self.file_cache[-n:] if self.file_cache else []

openmanus_rl/memory/rag_memory.py

Whitespace-only changes.
Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,19 @@
1+
"""
2+
Multi-turn rollout module.
3+
Modular stage processing with memory.md integration.
4+
"""
5+
6+
from .openmanus_rollout import OpenmanusRollout
7+
from .modular_stages import ModularStageProcessor, DEFAULT_TOOLS
18
from .rollout_loop import TrajectoryCollector
2-
from .utils import adjust_batch
9+
from .tool_integration import GLOBAL_TOOL_REGISTRY, ToolRegistry, create_simple_tool_wrappers
10+
11+
__all__ = [
12+
'OpenmanusRollout', # VERL-compatible rollout with modular stages
13+
'ModularStageProcessor', # Standalone modular processor
14+
'DEFAULT_TOOLS', # Simple tool functions
15+
'TrajectoryCollector', # Legacy, kept for compatibility
16+
'GLOBAL_TOOL_REGISTRY', # Global tool registry instance
17+
'ToolRegistry', # Tool registry class
18+
'create_simple_tool_wrappers' # Helper for tool wrappers
19+
]

0 commit comments

Comments
 (0)