44# Licensed under the MIT License. See License.txt in the project root for license information.
55# ======================================================================================================================
66import json
7+ import re
8+ from concurrent .futures import ThreadPoolExecutor , as_completed
79from dataclasses import dataclass
810from typing import Dict , List , Optional , Sequence
911from linkup import LinkupClient
@@ -69,12 +71,158 @@ def _get_max_results_per_provider() -> int:
6971 pass
7072
7173
74+ @value ('internet-search.summary-length' )
75+ def _get_max_summary_length () -> int :
76+ pass
77+
78+
7279def _truncate (text : str , max_chars : int ) -> str :
7380 if len (text ) <= max_chars :
7481 return text
7582 return text [: max_chars - 1 ].rstrip () + "…"
7683
7784
85+ def _extract_summary (text : str , max_sentences : int = 4 ) -> str :
86+ """
87+ 从文本中提取前几句话作为摘要
88+
89+ Args:
90+ text: 原始文本
91+ max_sentences: 最多保留的句子数,默认为4句
92+
93+ Returns:
94+ 摘要文本
95+ """
96+ if not text :
97+ return ""
98+
99+ # 使用正则表达式匹配句子结束符号
100+ sentences = re .split (r'([。!?\.!?]+["\'»\)]?\s*)' , text )
101+
102+ # 重新组合句子(将分隔符和句子内容合并)
103+ combined_sentences = []
104+ for i in range (0 , len (sentences ) - 1 , 2 ):
105+ sentence = sentences [i ]
106+ separator = sentences [i + 1 ] if i + 1 < len (sentences ) else ""
107+ combined = (sentence + separator ).strip ()
108+ if combined :
109+ combined_sentences .append (combined )
110+
111+ # 如果最后一个元素没有分隔符
112+ if len (sentences ) % 2 == 1 and sentences [- 1 ].strip ():
113+ combined_sentences .append (sentences [- 1 ].strip ())
114+
115+ # 取前 max_sentences 句
116+ if len (combined_sentences ) <= max_sentences :
117+ summary = " " .join (combined_sentences )
118+ else :
119+ summary = " " .join (combined_sentences [:max_sentences ])
120+
121+ # 确保摘要不会过长(最多150字符)
122+ if len (summary ) > _get_max_summary_length ():
123+ summary = summary [:(_get_max_summary_length () - 3 )].rstrip () + "..."
124+
125+ return summary
126+
127+
128+ def _search_exa (query : str , api_key : str , max_results : int , max_snippet_chars : int ) -> List [SearchItem ]:
129+ """在 Exa 中搜索"""
130+ items : List [SearchItem ] = []
131+ try :
132+ exa_client = Exa (api_key = api_key )
133+ res = exa_client .search_and_contents (
134+ query ,
135+ text = {"max_characters" : 2000 },
136+ livecrawl = "always" ,
137+ num_results = max_results ,
138+ )
139+ for i , r in enumerate (getattr (res , "results" , [])[:max_results ]):
140+ text = _truncate (getattr (r , "text" , "" ) or getattr (r , "content" , "" ) or "" , max_snippet_chars )
141+ summary = _extract_summary (text ) # 提取3-4句话作为摘要
142+ items .append (
143+ SearchItem (
144+ id = getattr (r , "id" , "" ) or f"exa_{ i } " ,
145+ text = summary ,
146+ score = 12.0 ,
147+ metadata = {
148+ "fileName" : getattr (r , "title" , "" ) or "" ,
149+ "url" : getattr (r , "url" , "" ) or "" ,
150+ "source" : "exa" ,
151+ "published_date" : getattr (r , "published_date" , None ),
152+ "summary" : summary ,
153+ }
154+ )
155+ )
156+ except Exception as e :
157+ sys_plugin_logger .warning (f'Failed to search in Exa tool: { str (e )} ' )
158+ return items
159+
160+
161+ def _search_tavily (query : str , api_key : str , max_results : int , max_snippet_chars : int ) -> List [SearchItem ]:
162+ """在 Tavily 中搜索"""
163+ items : List [SearchItem ] = []
164+ try :
165+ tavily_client = TavilyClient (api_key = api_key )
166+ res = tavily_client .search (
167+ query = query ,
168+ max_results = max_results ,
169+ include_images = False ,
170+ )
171+ for i , r in enumerate (res .get ("results" , [])[:max_results ]):
172+ text = _truncate (r .get ("content" , "" ) or "" , max_snippet_chars )
173+ summary = _extract_summary (text ) # 提取3-4句话作为摘要
174+ items .append (
175+ SearchItem (
176+ id = r .get ("id" , "" ) or f"tavily_{ i } " ,
177+ text = summary ,
178+ score = 12.0 ,
179+ metadata = {
180+ "fileName" : r .get ("title" , "" ) or "" ,
181+ "url" : r .get ("url" , "" ) or "" ,
182+ "source" : "tavily" ,
183+ "published_date" : r .get ("published_date" ),
184+ "summary" : summary ,
185+ }
186+ )
187+ )
188+ except Exception as e :
189+ sys_plugin_logger .warning (f'Failed to search in Tavily tool: { str (e )} ' )
190+ return items
191+
192+
193+ def _search_linkup (query : str , api_key : str , max_results : int , max_snippet_chars : int ) -> List [SearchItem ]:
194+ """在 Linkup 中搜索"""
195+ items : List [SearchItem ] = []
196+ try :
197+ linkup_client = LinkupClient (api_key = api_key )
198+ resp = linkup_client .search (
199+ query = query ,
200+ depth = "standard" ,
201+ output_type = "searchResults" ,
202+ include_images = False ,
203+ )
204+ for i , r in enumerate (getattr (resp , "results" , [])[:max_results ]):
205+ text = _truncate (getattr (r , "content" , "" ) or getattr (r , "text" , "" ) or "" , max_snippet_chars )
206+ summary = _extract_summary (text ) # 提取3-4句话作为摘要
207+ items .append (
208+ SearchItem (
209+ id = getattr (r , "id" , "" ) or f"linkup_{ i } " ,
210+ text = summary ,
211+ score = 12.0 ,
212+ metadata = {
213+ "fileName" : getattr (r , "name" , None ) or getattr (r , "title" , "" ) or "" ,
214+ "url" : getattr (r , "url" , "" ) or "" ,
215+ "source" : "linkup" ,
216+ "published_date" : None ,
217+ "summary" : summary ,
218+ }
219+ )
220+ )
221+ except Exception as e :
222+ sys_plugin_logger .warning (f'Failed to search in Linkup tool: { str (e )} ' )
223+ return items
224+
225+
78226def _internet_search (
79227 query : str ,
80228 api_keys : Dict [str , str ],
@@ -88,102 +236,44 @@ def _internet_search(
88236 for name in ("exa" , "tavily" , "linkup" ):
89237 if api_keys .get (name ):
90238 selected .append (name )
91- items : List [SearchItem ] = []
92- errors = [] # 记录失败的搜索工具
93239
94- # Exa
240+ # 准备并行搜索任务
241+ search_tasks = []
95242 if "exa" in selected and api_keys .get ("exa" ):
96- try :
97- exa_client = Exa (api_key = api_keys ["exa" ])
98- res = exa_client .search_and_contents (
99- query ,
100- text = {"max_characters" : 2000 },
101- livecrawl = "always" ,
102- num_results = max_results_per_provider ,
103- )
104- for i , r in enumerate (getattr (res , "results" , [])[:max_results_per_provider ]):
105- text = _truncate (getattr (r , "text" , "" ) or getattr (r , "content" , "" ) or "" , max_snippet_chars )
106- items .append (
107- SearchItem (
108- id = getattr (r , "id" , "" ) or f"exa_{ i } " ,
109- text = text ,
110- score = 12.0 , # 使用float确保序列化
111- metadata = {
112- "fileName" : getattr (r , "title" , "" ) or "" ,
113- "url" : getattr (r , "url" , "" ) or "" ,
114- "source" : "exa" ,
115- "published_date" : getattr (r , "published_date" , None ),
116- "summary" : text ,
117- }
118- )
119- )
120- except Exception as e :
121- sys_plugin_logger .warning (f'Failed to search in Exa tool: { str (e )} ' )
122- errors .append ("exa" )
123-
124- # Tavily
243+ search_tasks .append (("exa" , _search_exa , api_keys ["exa" ]))
125244 if "tavily" in selected and api_keys .get ("tavily" ):
126- try :
127- tavily_client = TavilyClient (api_key = api_keys ["tavily" ])
128- res = tavily_client .search (
129- query = query ,
130- max_results = max_results_per_provider ,
131- include_images = False ,
132- )
133- for i , r in enumerate (res .get ("results" , [])[:max_results_per_provider ]):
134- text = _truncate (r .get ("content" , "" ) or "" , max_snippet_chars )
135- items .append (
136- SearchItem (
137- id = r .get ("id" , "" ) or f"tavily_{ i } " ,
138- text = text ,
139- score = 12.0 ,
140- metadata = {
141- "fileName" : r .get ("title" , "" ) or "" ,
142- "url" : r .get ("url" , "" ) or "" ,
143- "source" : "tavily" ,
144- "published_date" : r .get ("published_date" ),
145- "summary" : text ,
146- }
147- )
148- )
149- except Exception as e :
150- sys_plugin_logger .warning (f'Failed to search in Tavily tool: { str (e )} ' )
151- errors .append ("tavily" )
152-
153- # Linkup
245+ search_tasks .append (("tavily" , _search_tavily , api_keys ["tavily" ]))
154246 if "linkup" in selected and api_keys .get ("linkup" ):
155- try :
156- linkup_client = LinkupClient (api_key = api_keys ["linkup" ])
157- resp = linkup_client .search (
158- query = query ,
159- depth = "standard" ,
160- output_type = "searchResults" ,
161- include_images = False ,
162- )
163- for i , r in enumerate (getattr (resp , "results" , [])[:max_results_per_provider ]):
164- text = _truncate (getattr (r , "content" , "" ) or getattr (r , "text" , "" ) or "" , max_snippet_chars )
165- items .append (
166- SearchItem (
167- id = getattr (r , "id" , "" ) or f"linkup_{ i } " ,
168- text = text ,
169- score = 12.0 ,
170- metadata = {
171- "fileName" : getattr (r , "name" , None ) or getattr (r , "title" , "" ) or "" ,
172- "url" : getattr (r , "url" , "" ) or "" ,
173- "source" : "linkup" ,
174- "published_date" : None ,
175- "summary" : text ,
176- }
177- )
178- )
179- except Exception as e :
180- sys_plugin_logger .warning (f'Failed to search in Linkup tool: { str (e )} ' )
181- errors .append ("linkup" )
182-
247+ search_tasks .append (("linkup" , _search_linkup , api_keys ["linkup" ]))
248+
249+ # 使用线程池并行执行搜索
250+ items : List [SearchItem ] = []
251+ errors = []
252+
253+ with ThreadPoolExecutor (max_workers = len (search_tasks )) as executor :
254+ # 提交所有搜索任务
255+ future_to_provider = {
256+ executor .submit (task_func , query , api_key , max_results_per_provider , max_snippet_chars ): provider_name
257+ for provider_name , task_func , api_key in search_tasks
258+ }
259+
260+ # 收集结果
261+ for future in as_completed (future_to_provider ):
262+ provider_name = future_to_provider [future ]
263+ try :
264+ results = future .result ()
265+ if results :
266+ items .extend (results )
267+ else :
268+ errors .append (provider_name )
269+ except Exception as e :
270+ sys_plugin_logger .error (f'Unexpected error in { provider_name } search: { str (e )} ' )
271+ errors .append (provider_name )
272+
183273 # 如果所有搜索都失败了,才抛出异常
184274 if not items and errors :
185275 raise FitException (
186- InternalErrorCode .CLIENT_ERROR ,
276+ InternalErrorCode .CLIENT_ERROR ,
187277 f'All search tools failed: { ", " .join (errors )} '
188278 )
189279
0 commit comments