Skip to content

Commit 810f71e

Browse files
committed
feat: finish bocha search
1 parent 42c1ed7 commit 810f71e

File tree

4 files changed

+44
-29
lines changed

4 files changed

+44
-29
lines changed

examples/basic_modules/textual_memory_internet_search_example.py

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,12 @@
2626
{
2727
"backend": "bocha",
2828
"config": {
29-
"api_key": "sk-xxxx", # 🔑 Your BochaAI API Key
30-
"search_engine_id": "", # Not required for BochaAI, but field exists for API consistency
29+
"api_key": "sk-xxx", # Your BochaAI API Key
3130
"max_results": 5,
3231
"reader": { # Reader config for chunking web content
33-
"backend": "simple",
34-
"config": {},
32+
"backend": "simple_struct",
33+
"config": { # your simple struct reader config
34+
},
3535
},
3636
},
3737
}
@@ -43,21 +43,10 @@
4343
# ========= 4. Run BochaAI Web Search =========
4444
print("=== Scenario 1: Web Search (BochaAI) ===")
4545
query_web = "Alibaba 2024 ESG report"
46-
results_web = retriever.retrieve_from_web(query_web)
46+
results_web = retriever.retrieve_from_internet(query_web)
4747

4848
print(f"Retrieved {len(results_web)} memory items.")
4949
for idx, item in enumerate(results_web, 1):
50-
print(f"[{idx}] {item.memory[:100]}...") # preview first 100 chars
51-
52-
print("==" * 20)
53-
54-
# ========= 5. Run BochaAI AI Search =========
55-
print("=== Scenario 2: AI Search (BochaAI) ===")
56-
query_ai = "Weather in Beijing"
57-
results_ai = retriever.retrieve_from_ai(query_ai)
58-
59-
print(f"Retrieved {len(results_ai)} memory items.")
60-
for idx, item in enumerate(results_ai, 1):
61-
print(f"[{idx}] {item.memory[:100]}...")
50+
print(f"[{idx}] {item.memory[:500]}...") # preview first 100 chars
6251

6352
print("==" * 20)

src/memos/configs/internet_retriever.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,17 @@ class XinyuSearchConfig(BaseInternetRetrieverConfig):
5555
)
5656

5757

58-
class BochaSearchConfig(XinyuSearchConfig):
58+
class BochaSearchConfig(BaseInternetRetrieverConfig):
5959
"""Configuration class for Bocha Search API."""
6060

61+
max_results: int = Field(default=20, description="Maximum number of results to retrieve")
62+
num_per_request: int = Field(default=10, description="Number of results per API request")
63+
reader: MemReaderConfigFactory = Field(
64+
...,
65+
default_factory=MemReaderConfigFactory,
66+
description="Reader configuration",
67+
)
68+
6169

6270
class InternetRetrieverConfigFactory(BaseConfig):
6371
"""Factory class for creating internet retriever configurations."""

src/memos/memories/textual/tree_text_memory/retrieve/bochasearch.py

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -81,12 +81,15 @@ def search_ai(
8181
return self._post(self.ai_url, body)
8282

8383
def _post(self, url: str, body: dict) -> list[dict]:
84-
"""Helper method to send POST request and return JSON results."""
84+
"""Send POST request and parse BochaAI search results."""
8585
try:
8686
resp = requests.post(url, headers=self.headers, json=body)
8787
resp.raise_for_status()
88-
data = resp.json()
89-
return data.get("results", [])
88+
raw_data = resp.json()
89+
90+
# ✅ parse the nested structure correctly
91+
return raw_data.get("data", {}).get("webPages", {}).get("value", [])
92+
9093
except Exception:
9194
import traceback
9295

@@ -100,7 +103,6 @@ class BochaAISearchRetriever:
100103
def __init__(
101104
self,
102105
access_key: str,
103-
search_engine_id: str,
104106
embedder: OllamaEmbedder,
105107
reader: BaseMemReader,
106108
max_results: int = 20,
@@ -110,7 +112,6 @@ def __init__(
110112
111113
Args:
112114
access_key: BochaAI API key
113-
search_engine_id: (Not used for Bocha, but kept for API consistency)
114115
embedder: Embedder instance for generating embeddings
115116
reader: MemReader instance for processing internet content
116117
max_results: Maximum number of search results to retrieve
@@ -178,12 +179,22 @@ def _convert_to_mem_items(
178179
def _process_result(
179180
self, result: dict, query: str, parsed_goal: str, info: None
180181
) -> list[TextualMemoryItem]:
181-
"""Process a single result into one or more TextualMemoryItems."""
182-
title = result.get("title", "")
183-
content = result.get("content", "")
184-
summary = result.get("summary", "")
182+
"""Process one Bocha search result into TextualMemoryItem."""
183+
title = result.get("name", "")
184+
content = result.get("summary", "") or result.get("snippet", "")
185+
summary = result.get("snippet", "")
185186
url = result.get("url", "")
186-
publish_time = datetime.now().strftime("%Y-%m-%d")
187+
publish_time = result.get("datePublished", "")
188+
189+
if publish_time:
190+
try:
191+
publish_time = datetime.fromisoformat(publish_time.replace("Z", "+00:00")).strftime(
192+
"%Y-%m-%d"
193+
)
194+
except Exception:
195+
publish_time = datetime.now().strftime("%Y-%m-%d")
196+
else:
197+
publish_time = datetime.now().strftime("%Y-%m-%d")
187198

188199
# Use reader to split and process the content into chunks
189200
read_items = self.reader.get_memory([content], type="doc", info=info)

src/memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,14 +64,21 @@ def from_config(
6464
max_results=config.max_results,
6565
num_per_request=config.num_per_request,
6666
)
67-
elif backend == "xinyu" or backend == "bocha":
67+
elif backend == "xinyu":
6868
return retriever_class(
6969
access_key=config.api_key, # Use api_key as access_key for xinyu
7070
search_engine_id=config.search_engine_id,
7171
embedder=embedder,
7272
reader=MemReaderFactory.from_config(config.reader),
7373
max_results=config.max_results,
7474
)
75+
elif backend == "bocha":
76+
return retriever_class(
77+
access_key=config.api_key, # Use api_key as access_key for xinyu
78+
embedder=embedder,
79+
reader=MemReaderFactory.from_config(config.reader),
80+
max_results=config.max_results,
81+
)
7582
else:
7683
raise ValueError(f"Unsupported backend: {backend}")
7784

0 commit comments

Comments
 (0)