diff --git a/main.py b/main.py index 6022251c..7b7548ac 100644 --- a/main.py +++ b/main.py @@ -287,21 +287,21 @@ def parse_args() -> argparse.Namespace: parser.add_argument( "--base-url", type=str, - default=os.getenv("PHONE_AGENT_BASE_URL", "http://localhost:8000/v1"), + default=os.getenv("PHONE_AGENT_BASE_URL", "https://open.bigmodel.cn/api/paas/v4"), help="Model API base URL", ) parser.add_argument( "--model", type=str, - default=os.getenv("PHONE_AGENT_MODEL", "autoglm-phone-9b"), + default=os.getenv("PHONE_AGENT_MODEL", "autoglm-phone"), help="Model name", ) parser.add_argument( "--apikey", type=str, - default=os.getenv("PHONE_AGENT_API_KEY", "EMPTY"), + default=os.getenv("PHONE_AGENT_API_KEY", "590af9e737b04858bc891cea879913b1.jGxAfNjDG8Tsl8PB"), help="API key for model authentication", ) @@ -368,6 +368,25 @@ def parse_args() -> argparse.Namespace: help="Language for system prompt (cn or en, default: cn)", ) + # History options + parser.add_argument( + "--history-id", + type=str, + help="History ID to reuse for this task", + ) + + parser.add_argument( + "--list-history", + action="store_true", + help="List all saved history records", + ) + + parser.add_argument( + "--clear-history", + action="store_true", + help="Clear all saved history records", + ) + parser.add_argument( "task", nargs="?", @@ -491,6 +510,29 @@ def main(): agent_config=agent_config, ) + # Handle history commands (these need agent to be created) + if args.list_history: + print("Saved history records:") + print("-" * 80) + history_records = agent.list_history() + if not history_records: + print(" No history records found.") + else: + for i, record in enumerate(history_records): + print(f" {i+1}. ID: {record.id}") + print(f" Task: {record.task[:60]}{'...' if len(record.task) > 60 else ''}") + print(f" Result: {record.result}") + print(f" Steps: {record.metadata.get('step_count', 0)}") + print(f" Success: {record.metadata.get('success', False)}") + print(f" Timestamp: {record.metadata.get('timestamp', 0)}") + print() + return + + if args.clear_history: + agent.clear_history() + print("All history records cleared.") + return + # Print header print("=" * 50) print("Phone Agent - AI-powered phone automation") @@ -512,7 +554,7 @@ def main(): # Run with provided task or enter interactive mode if args.task: print(f"\nTask: {args.task}\n") - result = agent.run(args.task) + result = agent.run(args.task, history_id=args.history_id) print(f"\nResult: {result}") else: # Interactive mode @@ -532,7 +574,7 @@ def main(): print() result = agent.run(task) print(f"\nResult: {result}\n") - agent.reset() + # Don't reset agent in interactive mode to preserve history except KeyboardInterrupt: print("\n\nInterrupted. Goodbye!") diff --git a/phone_agent/agent.py b/phone_agent/agent.py index b1703161..cd457a69 100644 --- a/phone_agent/agent.py +++ b/phone_agent/agent.py @@ -3,12 +3,18 @@ import json import traceback from dataclasses import dataclass -from typing import Any, Callable +from typing import Any, Callable, Optional from phone_agent.actions import ActionHandler from phone_agent.actions.handler import do, finish, parse_action from phone_agent.adb import get_current_app, get_screenshot from phone_agent.config import get_messages, get_system_prompt +from phone_agent.history import ( + ContextReuseStrategy, + HistoryConfig, + HistoryManager, + strategy_registry, +) from phone_agent.model import ModelClient, ModelConfig from phone_agent.model.client import MessageBuilder @@ -65,11 +71,13 @@ def __init__( self, model_config: ModelConfig | None = None, agent_config: AgentConfig | None = None, + history_config: HistoryConfig | None = None, confirmation_callback: Callable[[str], bool] | None = None, takeover_callback: Callable[[str], None] | None = None, ): self.model_config = model_config or ModelConfig() self.agent_config = agent_config or AgentConfig() + self.history_config = history_config or HistoryConfig() self.model_client = ModelClient(self.model_config) self.action_handler = ActionHandler( @@ -77,36 +85,101 @@ def __init__( confirmation_callback=confirmation_callback, takeover_callback=takeover_callback, ) + self.history_manager = HistoryManager(self.history_config) self._context: list[dict[str, Any]] = [] self._step_count = 0 - def run(self, task: str) -> str: + def run(self, task: str, history_id: Optional[str] = None, reuse_strategy: Optional[ContextReuseStrategy] = None) -> str: """ Run the agent to complete a task. Args: task: Natural language description of the task. + history_id: Optional history ID to reuse. If provided, ignores auto-reuse. + reuse_strategy: Optional custom reuse strategy. Returns: Final message from the agent. """ - self._context = [] + # 确定是否复用历史 + reuse_history = False + target_history = None + + if history_id: + # 使用指定的历史记录 + reuse_history = True + target_history = self.history_manager.get(history_id) + elif self.history_manager.should_reuse(task): + # 自动检测到需要复用历史 + reuse_history = True + target_history = self.history_manager.get() # 获取最近的历史 + + # 构建上下文 + if reuse_history and target_history: + # 使用默认的完整复用策略 + strategy = reuse_strategy or strategy_registry.get("full") + if strategy: + self._context = strategy.build_context(task, target_history) + else: + self._context = [] + else: + # 重置上下文 + self._context = [] + self._step_count = 0 # First step with user prompt result = self._execute_step(task, is_first=True) + final_result = result.message or "Task completed" if result.finished: - return result.message or "Task completed" + # 任务完成后保存历史记录 + if self.history_config.enable_auto_save: + self.history_manager.save( + task=task, + context=self._context, + result=final_result, + metadata={ + "step_count": self._step_count, + "success": True, + "timestamp": 0 + } + ) + return final_result # Continue until finished or max steps reached while self._step_count < self.agent_config.max_steps: result = self._execute_step(is_first=False) + final_result = result.message or "Task completed" if result.finished: - return result.message or "Task completed" - + # 任务完成后保存历史记录 + if self.history_config.enable_auto_save: + self.history_manager.save( + task=task, + context=self._context, + result=final_result, + metadata={ + "step_count": self._step_count, + "success": True, + "timestamp": 0 + } + ) + return final_result + + # 任务未完成但达到最大步骤数,也保存历史记录 + if self.history_config.enable_auto_save: + self.history_manager.save( + task=task, + context=self._context, + result="Max steps reached", + metadata={ + "step_count": self._step_count, + "success": False, + "timestamp": 0 + } + ) return "Max steps reached" def step(self, task: str | None = None) -> StepResult: @@ -250,3 +323,34 @@ def context(self) -> list[dict[str, Any]]: def step_count(self) -> int: """Get the current step count.""" return self._step_count + + def get_history(self, history_id: Optional[str] = None, index: int = 0): + """ + Get a specific history record. + + Args: + history_id: Optional history ID to retrieve. + index: Optional index to retrieve (0 for most recent). + + Returns: + HistoryItem or None if not found. + """ + return self.history_manager.get(history_id, index) + + def list_history(self, limit: Optional[int] = None): + """ + List all saved history records. + + Args: + limit: Optional limit on the number of records to return. + + Returns: + List of HistoryItem objects. + """ + return self.history_manager.list(limit) + + def clear_history(self): + """ + Clear all saved history records. + """ + self.history_manager.clear() diff --git a/phone_agent/history/__init__.py b/phone_agent/history/__init__.py new file mode 100644 index 00000000..5087a368 --- /dev/null +++ b/phone_agent/history/__init__.py @@ -0,0 +1,21 @@ +"""History management module for Phone Agent.""" + +from phone_agent.history.manager import HistoryItem, HistoryManager, HistoryConfig +from phone_agent.history.strategy import ( + ContextReuseStrategy, + FullReuseStrategy, + TaskBasedReuseStrategy, + CustomReuseStrategy, + strategy_registry, +) + +__all__ = [ + "HistoryItem", + "HistoryManager", + "HistoryConfig", + "ContextReuseStrategy", + "FullReuseStrategy", + "TaskBasedReuseStrategy", + "CustomReuseStrategy", + "strategy_registry", +] diff --git a/phone_agent/history/manager.py b/phone_agent/history/manager.py new file mode 100644 index 00000000..4a26fb18 --- /dev/null +++ b/phone_agent/history/manager.py @@ -0,0 +1,160 @@ +"""Core history management implementation.""" + +import json +import os +import time +import uuid +from dataclasses import asdict, dataclass, field +from typing import Any, Dict, List, Optional + + +@dataclass +class HistoryItem: + """通用历史记录项""" + id: str # 唯一标识符 + task: str # 原始任务描述 + context: List[Dict[str, Any]] # 完整对话上下文 + result: str # 任务结果 + metadata: Dict[str, Any] # 附加元数据(如时间戳、标签等) + + +@dataclass +class HistoryConfig: + """历史管理配置""" + max_history: int = 10 # 最大历史记录数量 + enable_auto_save: bool = True # 是否自动保存历史 + enable_auto_reuse: bool = True # 是否自动检测并复用历史 + enable_persistence: bool = True # 是否启用持久化存储 + persistence_file: str = "phone_agent_history.json" # 持久化文件路径 + reuse_triggers: List[str] = field(default_factory=lambda: [ # 自动复用触发词 + "老样子", "同样", "上次","老地方",'再来','再次', "same as before", "repeat", "again" + ]) + + +class HistoryManager: + """通用历史对话管理器""" + + def __init__(self, config: Optional[HistoryConfig] = None): + self.config = config or HistoryConfig() + self._history: List[HistoryItem] = [] + + # 从文件加载历史记录 + if self.config.enable_persistence: + self._load_history_from_file() + + def save(self, task: str, context: List[Dict[str, Any]], result: str, metadata: Optional[Dict[str, Any]] = None) -> HistoryItem: + """保存历史记录""" + # 生成唯一ID + history_id = str(uuid.uuid4())[:8] + # 创建历史记录项 + history_item = HistoryItem( + id=history_id, + task=task, + context=context.copy(), + result=result, + metadata=metadata or {"timestamp": time.time()} + ) + # 添加到历史记录(最新的在前) + self._history.insert(0, history_item) + # 限制历史记录数量 + if len(self._history) > self.config.max_history: + self._history = self._history[:self.config.max_history] + + # 保存到文件 + if self.config.enable_persistence: + self._save_history_to_file() + + return history_item + + def get(self, history_id: Optional[str] = None, index: int = 0) -> Optional[HistoryItem]: + """获取历史记录""" + if history_id: + # 根据ID查找 + for item in self._history: + if item.id == history_id: + return item + elif index < len(self._history): + # 根据索引查找 + return self._history[index] + return None + + def list(self, limit: Optional[int] = None) -> List[HistoryItem]: + """列出历史记录""" + return self._history[:limit] + + def delete(self, history_id: Optional[str] = None, index: int = 0) -> bool: + """删除历史记录""" + if history_id: + # 根据ID删除 + for i, item in enumerate(self._history): + if item.id == history_id: + self._history.pop(i) + return True + elif index < len(self._history): + # 根据索引删除 + self._history.pop(index) + return True + return False + + def clear(self) -> None: + """清空所有历史记录""" + self._history.clear() + # 保存到文件 + if self.config.enable_persistence: + self._save_history_to_file() + + def should_reuse(self, task: str) -> bool: + """判断是否应该复用历史""" + if not self.config.enable_auto_reuse: + return False + # 检查是否包含触发词 + task_lower = task.lower() + for trigger in self.config.reuse_triggers: + if trigger.lower() in task_lower: + return True + return False + + def _load_history_from_file(self) -> None: + """从JSON文件加载历史记录""" + if not os.path.exists(self.config.persistence_file): + return # 文件不存在,跳过加载 + + try: + with open(self.config.persistence_file, 'r', encoding='utf-8') as f: + history_data = json.load(f) + # 将字典转换为HistoryItem对象 + self._history = [HistoryItem(**item) for item in history_data] + except Exception as e: + print(f"Warning: Failed to load history from {self.config.persistence_file}: {e}") + self._history = [] + + def _save_history_to_file(self) -> None: + """将历史记录保存到JSON文件""" + try: + # 将HistoryItem对象转换为字典 + history_data = [asdict(item) for item in self._history] + with open(self.config.persistence_file, 'w', encoding='utf-8') as f: + json.dump(history_data, f, ensure_ascii=False, indent=2) + except Exception as e: + print(f"Warning: Failed to save history to {self.config.persistence_file}: {e}") + + def delete(self, history_id: Optional[str] = None, index: int = 0) -> bool: + """删除历史记录""" + success = False + if history_id: + # 根据ID删除 + for i, item in enumerate(self._history): + if item.id == history_id: + self._history.pop(i) + success = True + break + elif index < len(self._history): + # 根据索引删除 + self._history.pop(index) + success = True + + # 如果删除成功,保存到文件 + if success and self.config.enable_persistence: + self._save_history_to_file() + + return success diff --git a/phone_agent/history/strategy.py b/phone_agent/history/strategy.py new file mode 100644 index 00000000..9fce7c8f --- /dev/null +++ b/phone_agent/history/strategy.py @@ -0,0 +1,86 @@ +"""Context reuse strategies for history management.""" + +import re +from abc import ABC, abstractmethod +from typing import Any, Callable, Dict, List + +from phone_agent.history.manager import HistoryItem + + +class ContextReuseStrategy(ABC): + """上下文复用策略接口""" + + @abstractmethod + def build_context(self, current_task: str, history_item: HistoryItem) -> List[Dict[str, Any]]: + """根据历史记录构建当前上下文""" + pass + + +class FullReuseStrategy(ContextReuseStrategy): + """完全复用策略:复用完整历史上下文""" + + def build_context(self, current_task: str, history_item: HistoryItem) -> List[Dict[str, Any]]: + # 复制完整历史上下文 + context = history_item.context.copy() + # 移除最后可能的用户消息(避免重复) + if context and context[-1]["role"] == "user": + context.pop() + return context + + +class TaskBasedReuseStrategy(ContextReuseStrategy): + """基于任务的复用策略:仅复用与当前任务相关的历史""" + + def build_context(self, current_task: str, history_item: HistoryItem) -> List[Dict[str, Any]]: + # 这里可以实现更复杂的任务相关性分析 + # 例如,提取历史中的任务意图,与当前任务进行匹配 + # 简化实现:如果任务类型相似,则复用 + if self._is_similar_task(current_task, history_item.task): + return self._extract_relevant_context(history_item.context, current_task) + return [] + + def _is_similar_task(self, task1: str, task2: str) -> bool: + # 简化实现:检查任务中是否包含相同的关键词 + keywords1 = set(re.findall(r'\w+', task1.lower())) + keywords2 = set(re.findall(r'\w+', task2.lower())) + return len(keywords1.intersection(keywords2)) > 0 + + def _extract_relevant_context(self, context: List[Dict[str, Any]], current_task: str) -> List[Dict[str, Any]]: + # 简化实现:返回完整上下文 + return context.copy() + + +class CustomReuseStrategy(ContextReuseStrategy): + """自定义复用策略:允许用户通过回调函数自定义上下文构建""" + + def __init__(self, custom_builder: Callable[[str, HistoryItem], List[Dict[str, Any]]]): + self.custom_builder = custom_builder + + def build_context(self, current_task: str, history_item: HistoryItem) -> List[Dict[str, Any]]: + return self.custom_builder(current_task, history_item) + + +class ReuseStrategyRegistry: + """复用策略注册表""" + + def __init__(self): + self._strategies = {} + + def register(self, name: str, strategy: ContextReuseStrategy): + """注册复用策略""" + self._strategies[name] = strategy + + def get(self, name: str) -> ContextReuseStrategy: + """获取复用策略""" + return self._strategies.get(name) + + def list(self) -> List[str]: + """列出所有策略""" + return list(self._strategies.keys()) + + +# 全局策略注册表 +strategy_registry = ReuseStrategyRegistry() +# 注册内置策略 +strategy_registry.register("full", FullReuseStrategy()) +strategy_registry.register("task_based", TaskBasedReuseStrategy()) diff --git a/phone_agent_history.json b/phone_agent_history.json new file mode 100644 index 00000000..e69de29b