Skip to content

Commit 52480e1

Browse files
committed
Remove E402 noqa comments and restore pyproject.toml configuration in data_analysis_with_datalake directory
1 parent 8482c2e commit 52480e1

File tree

9 files changed

+254
-148
lines changed

9 files changed

+254
-148
lines changed
Lines changed: 34 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,37 @@
1+
# 导入所有必要的模块
12
import os
2-
import json
3+
import sys
34
import logging
45
from pathlib import Path
5-
66
from dotenv import load_dotenv
7+
8+
9+
# 将当前目录添加到sys.path以便本地模块导入
10+
sys.path.append(str(Path(__file__).resolve().parent))
711
# 加载 settings.txt(dotenv 格式)
8-
load_dotenv(dotenv_path=str(Path(__file__).resolve().parent / "settings.txt"), override=False)
12+
load_dotenv(
13+
dotenv_path=str(Path(__file__).resolve().parent / "settings.txt"), override=False
14+
)
15+
16+
# 导入veadk和agentkit相关模块
17+
from veadk import Agent, Runner # noqa: E402
18+
from veadk.auth.veauth.ark_veauth import get_ark_token # noqa: E402
19+
from veadk.memory.short_term_memory import ShortTermMemory # noqa: E402
20+
from veadk.tools.builtin_tools.video_generate import video_generate # noqa: E402
21+
from agentkit.apps import AgentkitAgentServerApp # noqa: E402
22+
23+
# 导入本地模块
24+
from tools.catalog_discovery import catalog_discovery # noqa: E402
25+
from tools.duckdb_sql_execution import duckdb_sql_execution # noqa: E402
26+
from tools.lancedb_hybrid_execution import lancedb_hybrid_execution # noqa: E402
27+
from prompts import SYSTEM_PROMPT # noqa: E402
928

10-
# Import get_ark_token and set MODEL_AGENT_API_KEY environment variable
11-
from veadk.auth.veauth.ark_veauth import get_ark_token
1229
# Check if MODEL_AGENT_API_KEY environment variable exists and is not empty
1330
if "MODEL_AGENT_API_KEY" not in os.environ or not os.environ["MODEL_AGENT_API_KEY"]:
1431
os.environ["MODEL_AGENT_API_KEY"] = get_ark_token()
1532
# Optionally assign to a variable for easier use in the file
1633
MODEL_AGENT_API_KEY = os.environ["MODEL_AGENT_API_KEY"]
1734

18-
from veadk import Agent, Runner
19-
from veadk.a2a.agent_card import get_agent_card
20-
from google.adk.a2a.executor.a2a_agent_executor import A2aAgentExecutor
21-
from agentkit.apps import AgentkitA2aApp
22-
23-
import sys
24-
sys.path.append(str(Path(__file__).resolve().parent))
25-
from tools.catalog_discovery import catalog_discovery
26-
from tools.duckdb_sql_execution import duckdb_sql_execution
27-
from tools.lancedb_hybrid_execution import lancedb_hybrid_execution
28-
from prompts import SYSTEM_PROMPT
29-
from veadk.memory.short_term_memory import ShortTermMemory
30-
from veadk.tools.builtin_tools.video_generate import video_generate
31-
from agentkit.apps import AgentkitAgentServerApp
32-
3335
short_term_memory = ShortTermMemory(backend="local")
3436

3537
# 设置日志
@@ -41,10 +43,17 @@
4143
# --- Logging Configuration ---
4244
logger = logging.getLogger(__name__)
4345

44-
tools = [catalog_discovery, duckdb_sql_execution, lancedb_hybrid_execution, video_generate]
46+
tools = [
47+
catalog_discovery,
48+
duckdb_sql_execution,
49+
lancedb_hybrid_execution,
50+
video_generate,
51+
]
4552

4653
# 创建带记忆的 Agent
47-
model_name = os.getenv("MODEL_AGENT_NAME", "doubao-seed-1-6-251015") # 默认使用更主流的豆包模型
54+
model_name = os.getenv(
55+
"MODEL_AGENT_NAME", "doubao-seed-1-6-251015"
56+
) # 默认使用更主流的豆包模型
4857
root_agent = Agent(
4958
description="基于LanceDB的数据检索Agent,支持结构化和向量查询。典型问题包括:1.你有哪些数据?2.给我一些样例数据?3.Ang Lee 评分超过7分的有哪些电影?4.Ang Lee 评分超过7分的电影中,有哪个电影海报中含有动物?5.Life of Pi 的电影海报,变成视频",
5059
instruction=SYSTEM_PROMPT,
@@ -71,8 +80,9 @@
7180
# )
7281

7382
agent_server_app = AgentkitAgentServerApp(
74-
agent=root_agent, short_term_memory=short_term_memory,
83+
agent=root_agent,
84+
short_term_memory=short_term_memory,
7585
)
7686

7787
if __name__ == "__main__":
78-
agent_server_app.run(host="0.0.0.0", port=8000)
88+
agent_server_app.run(host="0.0.0.0", port=8000)
Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import requests
22
import httpx
33
import random
4-
import json
54

65
from google.adk.cli.adk_web_server import CreateSessionRequest, RunAgentRequest
76
from google.genai.types import Content, Part
@@ -12,36 +11,36 @@
1211
# Step 0: setup running configs
1312
app_name = "data_analysis_with_code"
1413
user_id = "agentkit_user"
15-
session_id = f"agentkit_sample_session"
14+
session_id = "agentkit_sample_session"
1615
base_url = ""
1716
api_key = ""
18-
19-
20-
21-
task_num = 1
22-
17+
18+
task_num = 1
2319

2420
# Step 1: create a session
2521
def create_session():
2622
create_session_request = CreateSessionRequest(
27-
session_id = session_id + f"_{random.randint(1, 9999)}",
23+
session_id=session_id + f"_{random.randint(1, 9999)}",
2824
)
2925

3026
response = requests.post(
3127
url=f"{base_url}/apps/{app_name}/users/{user_id}/sessions/{create_session_request.session_id}",
3228
headers={"Authorization": f"Bearer {api_key}"},
3329
)
34-
30+
3531
print(f"[create session] Response from server: {response.json()}")
36-
32+
3733
return create_session_request.session_id
3834

3935
# Step 2: run agent with SSE
4036
run_agent_request = RunAgentRequest(
4137
app_name=app_name,
4238
user_id=user_id,
4339
session_id=create_session(),
44-
new_message=Content(parts=[Part(text="Ang Lee的电影评分超过7分,有哪些电影海报包含动物")], role="user"),
40+
new_message=Content(
41+
parts=[Part(text="Ang Lee的电影评分超过7分,有哪些电影海报包含动物")],
42+
role="user",
43+
),
4544
stream=True,
4645
)
4746

@@ -50,18 +49,28 @@ def create_session():
5049
# 3. Handle streaming events
5150
async def send_request(message: str):
5251
run_agent_request = RunAgentRequest(
53-
app_name=app_name,
54-
user_id=user_id,
55-
session_id=create_session(),
56-
new_message=Content(parts=[Part(text=message)], role="user"),
57-
stream=True,
52+
app_name=app_name,
53+
user_id=user_id,
54+
session_id=create_session(),
55+
new_message=Content(parts=[Part(text=message)], role="user"),
56+
stream=True,
5857
)
5958

60-
with httpx.stream("POST", f"{base_url}/run_sse", json=run_agent_request.model_dump(exclude_none=True), timeout=120, headers={"Authorization": f"Bearer {api_key}"}) as r:
59+
with httpx.stream(
60+
"POST",
61+
f"{base_url}/run_sse",
62+
json=run_agent_request.model_dump(exclude_none=True),
63+
timeout=120,
64+
headers={"Authorization": f"Bearer {api_key}"},
65+
) as r:
6166
for line in r.iter_lines():
6267
print(line)
63-
68+
6469
async def send_request_parallel():
65-
tasks = [send_request("Ang Lee的电影评分超过7分,有哪些电影海报包含动物") for _ in range(task_num)]
70+
tasks = [
71+
send_request("Ang Lee的电影评分超过7分,有哪些电影海报包含动物")
72+
for _ in range(task_num)
73+
]
6674
await asyncio.gather(*tasks)
67-
asyncio.run(send_request_parallel())
75+
76+
asyncio.run(send_request_parallel())

02-use-cases/data_analysis_with_datalake/prompts.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
import os
2-
3-
SYSTEM_PROMPT = '''
1+
SYSTEM_PROMPT = """
42
```你是一个火山引擎上基于 LanceDB + DuckDB + Doubao Vision 构建的数据检索专家,擅长依据用户自然语言问题,从 IMDB 数据集精准检索电影信息,以及进行多模态内容生成。
53
你的核心任务是根据用户自然语言问题,从 IMDB 数据集检索电影信息,或进行多模态内容生成。
64
@@ -111,4 +109,4 @@
111109
- 语言表达专业、清晰,对每个步骤的描述准确明了。
112110
- 若使用工具,需明确写出工具名称及具体参数。
113111
```
114-
'''
112+
"""

02-use-cases/data_analysis_with_datalake/tools/catalog_discovery.py

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,27 @@
1-
import os
21
import json
32

43
from rich.console import Console
54

65
# Import the LanceDBManager singleton
76
from .lancedb_manager import lancedb_manager
7+
88
# Import utility functions
99
from .utils import get_text_embedding as get_embedding
1010

1111
console = Console()
1212

13+
1314
def catalog_discovery(query_intent: str) -> str:
1415
"""Search metadata using vector similarity based on the user's intent keywords."""
1516
console.print(f"[catalog_discovery] Inputs: query_intent={query_intent!r}")
1617

1718
if not query_intent:
18-
return json.dumps({
19-
"status": "error",
20-
"error": "Query intent is empty. Please provide a keyword to search."
21-
})
19+
return json.dumps(
20+
{
21+
"status": "error",
22+
"error": "Query intent is empty. Please provide a keyword to search.",
23+
}
24+
)
2225

2326
tbl, error_msg = lancedb_manager.get_metadata_table()
2427
if error_msg:
@@ -31,20 +34,24 @@ def catalog_discovery(query_intent: str) -> str:
3134
return json.dumps({"error": emb_err})
3235

3336
# 调用Lance进行检索
34-
results_df = tbl.search(query_vector, vector_column_name="vector").limit(10).to_pandas()
37+
results_df = (
38+
tbl.search(query_vector, vector_column_name="vector").limit(10).to_pandas()
39+
)
3540
records = results_df.to_dict("records")
3641

3742
# Remove the vector column from the records before returning to the agent
3843
for record in records:
3944
record.pop("vector", None)
4045

4146
console.print(f"✅ 检索到 {len(records)} 条相关元数据")
42-
return json.dumps({
43-
"status": "ok",
44-
"records": records,
45-
"meta": {"row_count": len(records)},
46-
"echo": {"query_intent": query_intent}
47-
})
47+
return json.dumps(
48+
{
49+
"status": "ok",
50+
"records": records,
51+
"meta": {"row_count": len(records)},
52+
"echo": {"query_intent": query_intent},
53+
}
54+
)
4855
except Exception as e:
4956
error_msg = f"❌ 检索失败: {e}"
5057
console.print(f"[red]{error_msg}[/red]")

02-use-cases/data_analysis_with_datalake/tools/duckdb_sql_execution.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import os
21
import json
32

43
from rich.console import Console
@@ -8,21 +7,24 @@
87

98
console = Console()
109

10+
1111
def duckdb_sql_execution(sql: str, user_question: str = "") -> str:
1212
"""Execute structured SQL via DuckDB on Lance table contents.
1313
1414
Expect sql to be a direct SQL string:
1515
"SELECT ..."
1616
"""
17-
console.print(f"[duckdb_sql_execution] Inputs: sql={sql!r}, user_question={user_question!r}")
17+
console.print(
18+
f"[duckdb_sql_execution] Inputs: sql={sql!r}, user_question={user_question!r}"
19+
)
1820
if not sql or not isinstance(sql, str):
1921
return json.dumps({"error": "SQL 字符串缺失或类型错误"}, ensure_ascii=False)
2022

2123
# Open the table using the LanceDBManager
2224
tbl, err = lancedb_manager.open_table()
2325
if err:
2426
return json.dumps({"error": err}, ensure_ascii=False)
25-
27+
2628
view_name = "imdb_top_1000"
2729

2830
# Register Arrow/Pandas to DuckDB
@@ -56,6 +58,6 @@ def duckdb_sql_execution(sql: str, user_question: str = "") -> str:
5658
"meta": {
5759
"row_count": len(records),
5860
"table": view_name,
59-
}
61+
},
6062
}
61-
return json.dumps(result, ensure_ascii=False)
63+
return json.dumps(result, ensure_ascii=False)

02-use-cases/data_analysis_with_datalake/tools/lancedb_hybrid_execution.py

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,24 @@
1-
import os
21
import json
32
from typing import Optional
43

54
from rich.console import Console
65
import pandas as pd
76

8-
console = Console()
9-
107
# Import the LanceDBManager singleton
118
from .lancedb_manager import lancedb_manager
9+
1210
# Import utility functions
1311
from .utils import get_multimodal_text_vector as _get_text_vector
1412

13+
console = Console()
14+
1515

16-
def lancedb_hybrid_execution(query_text: str, filters: str = "", select: Optional[list] = None, limit: int = 10) -> str:
17-
console.print(f"[lancedb_hybrid_execution] Inputs: query_text={query_text!r}, filters={filters!r}")
16+
def lancedb_hybrid_execution(
17+
query_text: str, filters: str = "", select: Optional[list] = None, limit: int = 10
18+
) -> str:
19+
console.print(
20+
f"[lancedb_hybrid_execution] Inputs: query_text={query_text!r}, filters={filters!r}"
21+
)
1822

1923
# open table
2024
tbl, err = lancedb_manager.open_table()
@@ -50,13 +54,19 @@ def lancedb_hybrid_execution(query_text: str, filters: str = "", select: Optiona
5054
df_norm.columns = header
5155
records_obj = df_norm.to_dict(orient="records")
5256
except Exception:
53-
records_obj = [{header[i]: row[i] for i in range(len(header))} for row in df.values.tolist()]
57+
records_obj = [
58+
{header[i]: row[i] for i in range(len(header))}
59+
for row in df.values.tolist()
60+
]
5461
records = df.values.tolist()
55-
return json.dumps({
56-
"status": "ok",
57-
"data": [header] + records,
58-
"records": records_obj,
59-
"meta": {"row_count": len(records)}
60-
}, ensure_ascii=False)
62+
return json.dumps(
63+
{
64+
"status": "ok",
65+
"data": [header] + records,
66+
"records": records_obj,
67+
"meta": {"row_count": len(records)},
68+
},
69+
ensure_ascii=False,
70+
)
6171
except Exception as e:
62-
return json.dumps({"error": f"混合检索失败: {e}"}, ensure_ascii=False)
72+
return json.dumps({"error": f"混合检索失败: {e}"}, ensure_ascii=False)

0 commit comments

Comments
 (0)