|
7 | 7 | from datetime import datetime |
8 | 8 | from typing import List, Dict, Any, Optional, Tuple |
9 | 9 | from dataclasses import dataclass, asdict |
10 | | - |
| 10 | +from dotenv import load_dotenv |
11 | 11 | import requests |
12 | 12 |
|
13 | 13 | # Configure project imports |
14 | 14 | PROJECT_ROOT = Path(__file__).resolve().parent.parent |
15 | 15 | sys.path.insert(0, str(PROJECT_ROOT)) |
16 | 16 |
|
| 17 | +load_dotenv(PROJECT_ROOT / '.env') |
| 18 | + |
17 | 19 | from openmanus_rl.multi_turn_rollout.openmanus_rollout import OpenmanusRollout |
18 | 20 | from openmanus_rl.environments.env_manager import make_envs |
19 | 21 | from openmanus_rl.environments.prompts.alfworld import ALFWORLD_OPENMANUS_INITIAL_TEMPLATE |
@@ -144,11 +146,11 @@ def act(self, observation: str, admissible_actions: List[str]) -> Tuple[str, str |
144 | 146 | # Add user message to chat history |
145 | 147 | self.chat_history.append({"role": "user", "content": user_message}) |
146 | 148 |
|
147 | | - # Get response from LLM or fallback |
148 | | - if self.api_enabled: |
149 | | - response = self._query_llm_chat() |
150 | | - else: |
151 | | - response = self._heuristic_action(admissible_actions) |
| 149 | + # Get response from LLM only, no fallback |
| 150 | + if not self.api_enabled: |
| 151 | + raise RuntimeError("API not configured. Please set OPENAI_API_KEY and OPENAI_API_BASE") |
| 152 | + |
| 153 | + response = self._query_llm_chat() |
152 | 154 |
|
153 | 155 | # Add assistant response to chat history |
154 | 156 | self.chat_history.append({"role": "assistant", "content": response}) |
@@ -213,36 +215,12 @@ def _query_llm_chat(self) -> str: |
213 | 215 | return content |
214 | 216 | else: |
215 | 217 | logger.error(f"API error {response.status_code}: {response.text[:200]}") |
216 | | - return self._heuristic_action([]) |
| 218 | + raise RuntimeError(f"API request failed with status {response.status_code}: {response.text[:200]}") |
217 | 219 |
|
218 | 220 | except Exception as e: |
219 | 221 | logger.error(f"API exception: {e}") |
220 | | - return self._heuristic_action([]) |
| 222 | + raise RuntimeError(f"API request failed: {e}") |
221 | 223 |
|
222 | | - def _heuristic_action(self, available_actions: List[str]) -> str: |
223 | | - """Simple heuristic for action selection when API unavailable.""" |
224 | | - # Basic exploration strategy |
225 | | - action_sequence = ["look", "inventory", "go to kitchen", "go to cabinet 1", |
226 | | - "open cabinet 1", "take mug 1", "go to sinkbasin 1", |
227 | | - "clean mug 1", "go to coffeemachine 1", "put mug 1"] |
228 | | - |
229 | | - # Use chat history length to determine step |
230 | | - step_num = (len(self.chat_history) - 1) // 2 # Subtract system message, divide by 2 for user/assistant pairs |
231 | | - idx = step_num % len(action_sequence) |
232 | | - action = action_sequence[idx] |
233 | | - |
234 | | - # Check if action is valid |
235 | | - if available_actions and action not in str(available_actions): |
236 | | - # Try to find a similar valid action |
237 | | - for act in available_actions: |
238 | | - if any(keyword in act.lower() for keyword in ['go', 'take', 'put', 'open']): |
239 | | - action = act |
240 | | - break |
241 | | - |
242 | | - if self.is_first_turn: |
243 | | - return f"<think>\nExploring environment systematically for task: {self.current_task}\n</think>\n\n<action>\naction_choice: {action}\n</action>" |
244 | | - else: |
245 | | - return f"<memory_recall>\nRecalling previous exploration attempts.\n</memory_recall>\n\n<reflection>\nContinuing systematic exploration.\n</reflection>\n\n<think>\nNext logical step in exploration.\n</think>\n\n<action>\naction_choice: {action}\n</action>" |
246 | 224 |
|
247 | 225 | def _extract_action(self, response: str) -> str: |
248 | 226 | """Extract action from structured response.""" |
|
0 commit comments