1111from memos .chunkers .base import BaseChunker
1212from memos .embedders .factory import OllamaEmbedder
1313from memos .log import get_logger
14- from memos .memories .textual .item import TextualMemoryItem , TreeNodeTextualMemoryMetadata
14+ from memos .mem_reader .base import BaseMemReader
15+ from memos .memories .textual .item import TextualMemoryItem
1516
1617
1718logger = get_logger (__name__ )
@@ -115,6 +116,7 @@ def __init__(
115116 search_engine_id : str ,
116117 embedder : OllamaEmbedder ,
117118 chunker : BaseChunker ,
119+ reader : BaseMemReader ,
118120 max_results : int = 20 ,
119121 ):
120122 """
@@ -128,9 +130,10 @@ def __init__(
128130 self .xinyu_api = XinyuSearchAPI (access_key , search_engine_id , max_results = max_results )
129131 self .embedder = embedder
130132 self .chunker = chunker
133+ self .reader = reader
131134
132135 def retrieve_from_internet (
133- self , query : str , top_k : int = 10 , parsed_goal = None
136+ self , query : str , top_k : int = 10 , parsed_goal = None , info = None
134137 ) -> list [TextualMemoryItem ]:
135138 """
136139 Retrieve information from Xinyu search and convert to TextualMemoryItem format
@@ -139,7 +142,7 @@ def retrieve_from_internet(
139142 query: Search query
140143 top_k: Number of results to return
141144 parsed_goal: Parsed task goal (optional)
142-
145+ info (dict): Leave a record of memory consumption.
143146 Returns:
144147 List of TextualMemoryItem
145148 """
@@ -151,7 +154,7 @@ def retrieve_from_internet(
151154
152155 with ThreadPoolExecutor (max_workers = 8 ) as executor :
153156 futures = [
154- executor .submit (self ._process_result , result , query , parsed_goal )
157+ executor .submit (self ._process_result , result , query , parsed_goal , info )
155158 for result in search_results
156159 ]
157160 for future in as_completed (futures ):
@@ -301,7 +304,7 @@ def _extract_tags(self, title: str, content: str, summary: str, parsed_goal=None
301304 return list (set (tags ))[:15 ] # Limit to 15 tags
302305
303306 def _process_result (
304- self , result : dict , query : str , parsed_goal : str
307+ self , result : dict , query : str , parsed_goal : str , info : dict
305308 ) -> list [TextualMemoryItem ]:
306309 title = result .get ("title" , "" )
307310 content = result .get ("content" , "" )
@@ -318,55 +321,19 @@ def _process_result(
318321 publish_time = datetime .now ().strftime ("%Y-%m-%d" )
319322 else :
320323 publish_time = datetime .now ().strftime ("%Y-%m-%d" )
321- source = result .get ("source" , "" )
322- site = result .get ("site" , "" )
323- if site :
324- site = site .split ("|" )[0 ]
325324
326- qualified_chunks = self ._chunk ( content )
325+ read_items = self .reader . get_memory ([ content ], type = "doc" , info = info )
327326
328327 memory_items = []
329- for chunk_text , chunk_emb , score in qualified_chunks :
330- memory_content = (
328+ for read_item_i in read_items [ 0 ] :
329+ read_item_i . memory = (
331330 f"Title: { title } \n NewsTime: { publish_time } \n Summary: { summary } \n "
332- f"Content: { chunk_text } \n Source: { url } "
333- )
334- metadata = TreeNodeTextualMemoryMetadata (
335- user_id = None ,
336- session_id = None ,
337- status = "activated" ,
338- type = "fact" ,
339- source = "web" ,
340- confidence = score ,
341- entities = self ._extract_entities (title , content , summary ),
342- tags = self ._extract_tags (title , content , summary , parsed_goal ),
343- visibility = "public" ,
344- memory_type = "OuterMemory" ,
345- key = f"[{ source } ]" + title ,
346- sources = [url ] if url else [],
347- embedding = chunk_emb ,
348- created_at = datetime .now ().isoformat (),
349- usage = [],
350- background = f"Xinyu search result from { site or source } " ,
351- )
352- memory_items .append (
353- TextualMemoryItem (id = str (uuid .uuid4 ()), memory = memory_content , metadata = metadata )
331+ f"Content: { read_item_i .memory } "
354332 )
333+ read_item_i .metadata .source = "web"
334+ read_item_i .metadata .memory_type = "OuterMemory"
335+ read_item_i .metadata .sources = [url ] if url else []
336+ read_item_i .metadata .visibility = "public"
355337
338+ memory_items .append (read_item_i )
356339 return memory_items
357-
358- def _chunk (self , content : str ) -> list [tuple [str , list [float ], float ]]:
359- """
360- Use SentenceChunker to split content into chunks and embed each.
361-
362- Returns:
363- List of (chunk_text, chunk_embedding, dummy_score)
364- """
365- chunks = self .chunker .chunk (content )
366- if not chunks :
367- return []
368-
369- chunk_texts = [c .text for c in chunks ]
370- chunk_embeddings = self .embedder .embed (chunk_texts )
371-
372- return [(text , emb , 1.0 ) for text , emb in zip (chunk_texts , chunk_embeddings , strict = False )]
0 commit comments