Skip to content

Commit 0f2618b

Browse files
committed
Move files from agentkit_deploy directory to parent directory
1 parent f0301d1 commit 0f2618b

File tree

11 files changed

+635
-0
lines changed

11 files changed

+635
-0
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# AgentKit configuration
2+
agentkit.yaml
3+
agentkit*.yaml
4+
5+
# Python cache
6+
__pycache__/
7+
*.py[cod]
8+
*$py.class
9+
10+
# Virtual environments
11+
.venv/
12+
venv/
13+
ENV/
14+
env/
15+
16+
# IDE
17+
.vscode/
18+
.idea/
19+
.windsurf/
20+
21+
# Git
22+
.git/
23+
.gitignore
24+
25+
# Docker
26+
Dockerfile*
27+
.dockerignore
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
import os
2+
import json
3+
import logging
4+
from pathlib import Path
5+
6+
from dotenv import load_dotenv
7+
# 加载 settings.txt(dotenv 格式)
8+
load_dotenv(dotenv_path=str(Path(__file__).resolve().parent / "settings.txt"), override=False)
9+
10+
# Import get_ark_token and set MODEL_AGENT_API_KEY environment variable
11+
from veadk.auth.veauth.ark_veauth import get_ark_token
12+
# Check if MODEL_AGENT_API_KEY environment variable exists and is not empty
13+
if "MODEL_AGENT_API_KEY" not in os.environ or not os.environ["MODEL_AGENT_API_KEY"]:
14+
os.environ["MODEL_AGENT_API_KEY"] = get_ark_token()
15+
# Optionally assign to a variable for easier use in the file
16+
MODEL_AGENT_API_KEY = os.environ["MODEL_AGENT_API_KEY"]
17+
18+
from veadk import Agent, Runner
19+
from veadk.a2a.agent_card import get_agent_card
20+
from google.adk.a2a.executor.a2a_agent_executor import A2aAgentExecutor
21+
from agentkit.apps import AgentkitA2aApp
22+
23+
import sys
24+
sys.path.append(str(Path(__file__).resolve().parent))
25+
from tools.catalog_discovery import catalog_discovery
26+
from tools.duckdb_sql_execution import duckdb_sql_execution
27+
from tools.lancedb_hybrid_execution import lancedb_hybrid_execution
28+
from prompts import SYSTEM_PROMPT
29+
from veadk.memory.short_term_memory import ShortTermMemory
30+
from veadk.tools.builtin_tools.video_generate import video_generate
31+
from agentkit.apps import AgentkitAgentServerApp
32+
33+
short_term_memory = ShortTermMemory(backend="local")
34+
35+
# 设置日志
36+
logging.basicConfig(
37+
level=logging.INFO,
38+
format="%(asctime)s - %(levelname)s - %(message)s",
39+
)
40+
41+
# --- Logging Configuration ---
42+
logger = logging.getLogger(__name__)
43+
44+
tools = [catalog_discovery, duckdb_sql_execution, lancedb_hybrid_execution, video_generate]
45+
46+
# 定义带记忆的 Agent 类
47+
class DataAnalysisAgent(Agent):
48+
def __init__(self, **kwargs):
49+
super().__init__(**kwargs)
50+
51+
def run(self, input_text, session_id="default", **kwargs):
52+
# 从记忆中检索历史对话
53+
history = self.memory_manager.get_messages(session_id=session_id)
54+
# 构建包含历史对话的完整指令
55+
full_instruction = self.instruction
56+
for role, content in history:
57+
full_instruction += f"\n{role}: {content}"
58+
self.instruction = full_instruction
59+
# 处理当前用户输入
60+
response = super().run(input_text, **kwargs)
61+
# 将当前交互保存到记忆
62+
self.memory_manager.add_message(session_id=session_id, role="user", content=input_text)
63+
self.memory_manager.add_message(session_id=session_id, role="assistant", content=response)
64+
return response
65+
66+
# 创建带记忆的 Agent
67+
model_name = os.getenv("MODEL_AGENT_NAME", "doubao-seed-1-6-251015") # 默认使用更主流的豆包模型
68+
root_agent = DataAnalysisAgent(
69+
description="基于LanceDB的数据检索Agent,支持结构化和向量查询。典型问题包括:1.你有哪些数据?2.给我一些样例数据?3.Ang Lee 评分超过7分的有哪些电影?4.Ang Lee 评分超过7分的电影中,有哪个电影海报中含有动物?5.Life of Pi 的电影海报,变成视频",
70+
instruction=SYSTEM_PROMPT,
71+
model_name=model_name,
72+
tools=tools,
73+
short_term_memory=short_term_memory,
74+
)
75+
76+
runner = Runner(agent=root_agent)
77+
78+
# a2a_app = AgentkitA2aApp()
79+
80+
# @a2a_app.agent_executor(runner=runner)
81+
# class MyAgentExecutor(A2aAgentExecutor):
82+
# pass
83+
84+
# # 当直接运行此文件时,启动本地服务
85+
# if __name__ == "__main__":
86+
# logger.info("🚀 正在启动 A2A Agent 服务...")
87+
# a2a_app.run(
88+
# agent_card=get_agent_card(agent=root_agent, url="http://127.0.0.1:8000"),
89+
# host="0.0.0.0",
90+
# port=8000,
91+
# )
92+
93+
agent_server_app = AgentkitAgentServerApp(
94+
agent=root_agent, short_term_memory=short_term_memory,
95+
)
96+
97+
if __name__ == "__main__":
98+
agent_server_app.run(host="0.0.0.0", port=8000)
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
import os
2+
3+
SYSTEM_PROMPT = '''
4+
```你是一个火山引擎上基于 LanceDB + DuckDB + Doubao Vision 构建的数据检索专家,擅长依据用户自然语言问题,从 IMDB 数据集精准检索电影信息,以及进行多模态内容生成。
5+
你的核心任务是根据用户自然语言问题,从 IMDB 数据集检索电影信息,或进行多模态内容生成。
6+
7+
### 核心工作流 (ReAct Pattern)
8+
请严格按 "Thought (思考) -> Action (行动) -> Observation (观察) -> Final Answer (最终回答)" 模式执行。
9+
10+
1. **Discovery (探索)**:
11+
- 任务开始时,先调用 `[catalog_discovery]` 确认表名和可用字段。
12+
13+
2. **Query (查询)**:
14+
- 根据下方的 **"决策罗盘"** 选择 `[duckdb_sql_execution]` 或 `[lancedb_hybrid_execution]`。
15+
16+
3. **Result Handling (结果处理)**:
17+
- **结果为空**:严禁仅通过修改引号或大小写重试,直接回答用户“未找到”。
18+
- **结果正常**:立即停止调用,回答用户。
19+
20+
---
21+
22+
### 🧠 决策罗盘:我该用哪个工具? (关键)
23+
24+
在决定使用 DuckDB 还是 LanceDB 之前,请先判断用户的 **意图类型**:
25+
26+
| 用户意图特征 | 典型场景 | **必须使用的工具** |
27+
| :--- | :--- | :--- |
28+
| **已知实体/精确查找** | "查找《Life of Pi》的海报"、"《教父》的导演是谁" | **[duckdb_sql_execution]** |
29+
| **统计/排序/聚合** | "评分最高的 10 部电影"、"统计 Nolan 的电影数量" | **[duckdb_sql_execution]** |
30+
| **结构化属性过滤** | "2010 年之后的动作片"、"时长超过 2 小时的电影" | **[duckdb_sql_execution]** |
31+
| **视觉内容描述** | "海报里有一只老虎"、"画面黑暗且压抑的电影海报" | **[lancedb_hybrid_execution]** |
32+
| **模糊语义搜索** | "关于绝望与救赎的电影"、"类似《盗梦空间》剧情的电影" | **[lancedb_hybrid_execution]** |
33+
| **混合检索** | "Nolan 导演的(SQL)海报里有火(Visual)的电影" | **[lancedb_hybrid_execution]** (配合 filters) |
34+
35+
---
36+
37+
### 🔧 工具调用规范
38+
39+
#### 1. [duckdb_sql_execution] (结构化/精确检索)
40+
- **定义**:执行标准 SQL 语句,用于处理数值、文本精确匹配、排序和统计。
41+
- **何时使用**:
42+
1. 当用户明确提到电影名称时,需获取该电影的属性(海报、评分等),此时严禁使用 LanceDB,因为 SQL 才是最精准的。
43+
2. 涉及 `COUNT`, `AVG`, `ORDER BY`, `GROUP BY` 等逻辑操作。
44+
- **语法警告**:
45+
- `released_year` 是 **String** 类型,比较时必须加单引号!
46+
- ✅ `WHERE released_year > '2000'`
47+
- ❌ `WHERE released_year > 2000`
48+
49+
#### 2. [lancedb_hybrid_execution] (语义/视觉检索)
50+
- **定义**:执行向量相似度搜索(文本到图像/文本到文本)。
51+
- **何时使用**:
52+
1. 当用户描述画面的**视觉特征**时。
53+
2. 当用户描述**抽象概念**或**剧情氛围**,且无法通过简单的 Genre 字段过滤时。
54+
- **Filters 语法**:
55+
- 仅接受 SQL `WHERE` 子句格式。
56+
- 字符串必须用单引号。
57+
- `released_year` 必须用单引号。
58+
- 示例:`"director = 'Ang Lee' AND released_year > '2000'"`
59+
60+
#### 3. [video_generate] (视频生成)
61+
- **定义**:基于 Prompt 或图片生成视频。
62+
- **前置逻辑**:此工具通常作为 **最后一步**。
63+
- **路径 A (已知电影名)**:先用 `duckdb_sql_execution` 查出 `poster_precision_link` -> 再调用 `video_generate`。
64+
- **路径 B (未知电影/视觉描述)**:先用 `lancedb_hybrid_execution` 搜出符合描述的电影及海报 -> 再调用 `video_generate`。
65+
66+
---
67+
68+
### 📝 Few-Shot Examples (思维链示例)
69+
70+
#### Q1: 找出评分最高的动作片 (结构化统计)
71+
**User:** "找出评分最高的动作片"
72+
**Thought:** 此为结构化查询,涉及类型过滤和排序,应使用 SQL 处理。
73+
**Action:** `duckdb_sql_execution("SELECT series_title, imdb_rating FROM imdb_top_1000 WHERE genre LIKE '%Action%' ORDER BY imdb_rating DESC LIMIT 5")`
74+
75+
#### Q2: 统计 2015 年以后 Nolan 导演的电影数量 (统计聚合)
76+
**User:** "统计 2015 年以后 Nolan 导演的电影数量。"
77+
**Thought:** 这是统计聚合查询,且涉及年份过滤。注意 released_year 是字符串,必须加单引号。
78+
**Action:** `duckdb_sql_execution("SELECT count(*) FROM imdb_top_1000 WHERE director LIKE '%Nolan%' AND released_year > '2015'")`
79+
80+
#### Q3: Ang Lee 评分超过 7 分的电影中,有哪个电影海报中含有动物? (混合检索)
81+
**User:** "Ang Lee 评分超过 7 分的电影中,有哪个电影海报中含有动物?"
82+
**Thought:** 用户查询包含对电影海报内容的视觉描述(“含有动物”),需进行语义/视觉搜索,同时包含元数据过滤(导演和评分)。
83+
**Action:** `lancedb_hybrid_execution({"query_text": "poster with animals", "filters": "director LIKE '%Ang Lee%' AND imdb_rating > 7.0", "select": ["series_title", "poster_precision_link"], "limit": 10})`
84+
85+
#### Q4: 把《Life of Pi》的电影海报,变成视频 (已知实体 -> 视频)
86+
**User:** "把《Life of Pi》的电影海报,变成视频"
87+
**Thought:**
88+
1. 用户明确指定电影名 "Life of Pi"。
89+
2. 这是**精确查找**任务,直接用 SQL 获取其海报链接。
90+
3. 获取链接后,调用视频生成工具。
91+
**Action:** `duckdb_sql_execution("SELECT series_title, poster_precision_link FROM imdb_top_1000 WHERE series_title = 'Life of Pi'")`
92+
**Observation:** `[{"series_title": "Life of Pi", "poster_precision_link": "https://.../pi.jpg"}]`
93+
**Thought:** 已获取海报链接,现在生成视频。
94+
**Action:** `video_generate(params=[{"video_name": "life_of_pi_video.mp4", "first_frame": "https://.../pi.jpg", "prompt": "大海上的孤舟,奇幻漂流风格,海浪轻轻波动。 --rs 1080p"}], batch_size=1)`
95+
96+
#### Q5: 帮我找一张海报里有红色跑车的电影,并把它做成视频 (视觉描述 -> 视频)
97+
**User:** "帮我找一张海报里有红色跑车的电影,并把它做成视频。"
98+
**Thought:**
99+
1. 用户未指定电影名,而是描述画面内容(“红色跑车”)。
100+
2. 这是**视觉检索**任务,必须使用 LanceDB 查找符合描述的海报。
101+
3. 获取检索结果后,调用视频生成工具。
102+
**Action:** `lancedb_hybrid_execution({"query_text": "poster with a red sports car", "select": ["series_title", "poster_precision_link"], "limit": 1})`
103+
**Observation:** `[{"series_title": "Ford v Ferrari", "poster_precision_link": "https://.../fvf.jpg"}]`
104+
**Thought:** 已找到符合描述的电影《Ford v Ferrari》,现在生成视频。
105+
**Action:** `video_generate(params=[{"video_name": "car_movie.mp4", "first_frame": "https://.../fvf.jpg", "prompt": "红色跑车在赛道上飞驰,引擎轰鸣,速度感。"}], batch_size=1)`
106+
107+
# 输出格式
108+
- 按照 "Thought (思考) -> Action (行动) -> Observation (观察) -> Final Answer (最终回答)" 模式呈现结果。
109+
- 语言表达专业、清晰,对每个步骤的描述准确明了。
110+
- 若使用工具,需明确写出工具名称及具体参数。
111+
```
112+
'''
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
veadk-python
2+
veadk-python[extensions]
3+
google-adk
4+
python-dotenv
5+
lancedb
6+
agentkit-sdk-python
7+
volcengine-python-sdk[ark]
8+
pyarrow
9+
duckdb
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# Ark (OpenAI compatible)
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# Package marker for tools modules
2+
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import os
2+
import json
3+
4+
from rich.console import Console
5+
6+
# Import the LanceDBManager singleton
7+
from .lancedb_manager import lancedb_manager
8+
# Import utility functions
9+
from .utils import get_text_embedding as get_embedding
10+
11+
console = Console()
12+
13+
def catalog_discovery(query_intent: str) -> str:
14+
"""Search metadata using vector similarity based on the user's intent keywords."""
15+
console.print(f"[catalog_discovery] Inputs: query_intent={query_intent!r}")
16+
17+
if not query_intent:
18+
return json.dumps({
19+
"status": "error",
20+
"error": "Query intent is empty. Please provide a keyword to search."
21+
})
22+
23+
tbl, error_msg = lancedb_manager.get_metadata_table()
24+
if error_msg:
25+
return json.dumps({"error": error_msg})
26+
27+
try:
28+
# 调用方舟获取query condition的向量
29+
query_vector, emb_err = get_embedding(query_intent)
30+
if emb_err:
31+
return json.dumps({"error": emb_err})
32+
33+
# 调用Lance进行检索
34+
results_df = tbl.search(query_vector, vector_column_name="vector").limit(10).to_pandas()
35+
records = results_df.to_dict("records")
36+
37+
# Remove the vector column from the records before returning to the agent
38+
for record in records:
39+
record.pop("vector", None)
40+
41+
console.print(f"✅ 检索到 {len(records)} 条相关元数据")
42+
return json.dumps({
43+
"status": "ok",
44+
"records": records,
45+
"meta": {"row_count": len(records)},
46+
"echo": {"query_intent": query_intent}
47+
})
48+
except Exception as e:
49+
error_msg = f"❌ 检索失败: {e}"
50+
console.print(f"[red]{error_msg}[/red]")
51+
return json.dumps({"status": "error", "error": error_msg})
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import os
2+
import json
3+
4+
from rich.console import Console
5+
6+
# Import the LanceDBManager singleton
7+
from .lancedb_manager import lancedb_manager
8+
9+
console = Console()
10+
11+
def duckdb_sql_execution(sql: str, user_question: str = "") -> str:
12+
"""Execute structured SQL via DuckDB on Lance table contents.
13+
14+
Expect sql to be a direct SQL string:
15+
"SELECT ..."
16+
"""
17+
console.print(f"[duckdb_sql_execution] Inputs: sql={sql!r}, user_question={user_question!r}")
18+
if not sql or not isinstance(sql, str):
19+
return json.dumps({"error": "SQL 字符串缺失或类型错误"}, ensure_ascii=False)
20+
21+
# Open the table using the LanceDBManager
22+
tbl, err = lancedb_manager.open_table()
23+
if err:
24+
return json.dumps({"error": err}, ensure_ascii=False)
25+
26+
view_name = "imdb_top_1000"
27+
28+
# Register Arrow/Pandas to DuckDB
29+
conn = lancedb_manager.get_duckdb_connection()
30+
try:
31+
arrow_tbl = tbl.to_arrow()
32+
conn.register(view_name, arrow_tbl)
33+
except Exception:
34+
df = tbl.to_pandas()
35+
conn.register(view_name, df)
36+
37+
# Execute SQL
38+
try:
39+
out_df = conn.execute(sql).fetchdf()
40+
except Exception as e:
41+
return json.dumps({"error": f"DuckDB 执行失败: {e}"}, ensure_ascii=False)
42+
43+
# 构造 records(对象数组),并提供结构化响应
44+
header = [str(c) for c in out_df.columns]
45+
records_obj = out_df.to_dict(orient="records")
46+
47+
records = out_df.values.tolist()
48+
try:
49+
console.print(f"[sql] Returned rows: {len(records)} from table='{view_name}'")
50+
except Exception:
51+
pass
52+
result = {
53+
"status": "ok",
54+
"data": [header] + records,
55+
"records": records_obj,
56+
"meta": {
57+
"row_count": len(records),
58+
"table": view_name,
59+
}
60+
}
61+
return json.dumps(result, ensure_ascii=False)

0 commit comments

Comments
 (0)