@@ -8,6 +8,8 @@ import kotlinx.coroutines.withContext
88import kotlinx.serialization.SerialName
99import kotlinx.serialization.Serializable
1010import org.jsoup.Jsoup
11+ import java.net.URI
12+ import java.net.URLDecoder
1113import java.net.URLEncoder
1214import java.nio.charset.StandardCharsets
1315import java.time.LocalDate
@@ -62,7 +64,6 @@ IMPORTANT - Use the correct year in search queries:
6264 hookManager = hookManager,
6365 sessionId = sessionId,
6466) {
65-
6667 @Serializable
6768 data class Args (
6869 @property:LLMDescription(
@@ -96,38 +97,19 @@ IMPORTANT - Use the correct year in search queries:
9697 )
9798
9899 override suspend fun doExecute (args : Args ): Result = withContext(Dispatchers .IO ) {
99- try {
100- val searxResults = searchWithSearxNG(args.query)
101- if (searxResults.isNotEmpty()) {
102- val filteredResults =
103- filterResults(
104- searxResults,
105- args.allowedDomains,
106- args.blockedDomains
107- )
108- return @withContext Result (
109- query = args.query,
110- results = filteredResults.take(10 ),
111- sources = filteredResults.take(10 ).map { " [${it.title} ](${it.url} )" }
112- )
113- }
114- } catch (_: Exception ) {
115- // Fall back to DuckDuckGo
116- }
117-
118100 try {
119101 val duckduckgoResults = searchWithDuckDuckGo(args.query)
120- val filteredResults =
121- filterResults(
122- duckduckgoResults,
123- args.allowedDomains,
124- args.blockedDomains
125- )
126- return @withContext Result (
102+ val filteredResults = filterResults(
103+ duckduckgoResults,
104+ args.allowedDomains,
105+ args.blockedDomains
106+ ).take(10 )
107+ val result = Result (
127108 query = args.query,
128- results = filteredResults.take( 10 ) ,
129- sources = filteredResults.take( 10 ). map { " [${it.title} ](${it.url} )" }
109+ results = filteredResults,
110+ sources = filteredResults.map { " [${it.title} ](${it.url} )" }
130111 )
112+ return @withContext result
131113 } catch (_: Exception ) {
132114 return @withContext Result (
133115 query = args.query,
@@ -148,34 +130,6 @@ IMPORTANT - Use the correct year in search queries:
148130 )
149131 }
150132
151- private suspend fun searchWithSearxNG (query : String ): List <SearchResult > =
152- withContext(Dispatchers .IO ) {
153- val encodedQuery = URLEncoder .encode(query, StandardCharsets .UTF_8 .toString())
154- val url = " https://searx.space/search?q=$encodedQuery &format=json"
155-
156- val doc = Jsoup .connect(url)
157- .userAgent(userAgent)
158- .timeout(10000 )
159- .ignoreContentType(true )
160- .get()
161-
162- val jsonResponse = doc.body().text()
163- if (jsonResponse.isEmpty()) return @withContext emptyList()
164-
165- try {
166- val searxResponse = json.decodeFromString<SearxResponse >(jsonResponse)
167- searxResponse.results.map { result ->
168- SearchResult (
169- title = result.title,
170- url = result.url,
171- content = result.content
172- )
173- }
174- } catch (_: Exception ) {
175- emptyList()
176- }
177- }
178-
179133 private suspend fun searchWithDuckDuckGo (query : String ): List <SearchResult > =
180134 withContext(Dispatchers .IO ) {
181135 val encodedQuery = URLEncoder .encode(query, StandardCharsets .UTF_8 .toString())
@@ -193,10 +147,11 @@ IMPORTANT - Use the correct year in search queries:
193147 val snippet = resultDiv.selectFirst(" a.result__snippet" )
194148
195149 if (titleLink != null ) {
150+ val resolvedUrl = normalizeSearchResultUrl(titleLink.attr(" href" ))
196151 results.add(
197152 SearchResult (
198153 title = titleLink.text(),
199- url = titleLink.attr( " href " ) ,
154+ url = resolvedUrl ,
200155 content = snippet?.text() ? : " "
201156 )
202157 )
@@ -211,21 +166,61 @@ IMPORTANT - Use the correct year in search queries:
211166 allowedDomains : List <String >? ,
212167 blockedDomains : List <String >?
213168 ): List <SearchResult > {
169+ val effectiveAllowedDomains = normalizeDomains(allowedDomains)
170+ val effectiveBlockedDomains = normalizeDomains(blockedDomains)
214171 return results.filter { result ->
215172 val urlLower = result.url.lowercase()
216173
217- blockedDomains ?.any { domain ->
174+ effectiveBlockedDomains ?.any { domain ->
218175 urlLower.contains(domain.lowercase())
219176 }?.let { if (it) return @filter false }
220177
221- allowedDomains ?.let { allowed ->
178+ effectiveAllowedDomains ?.let { allowed ->
222179 allowed.any { domain ->
223180 urlLower.contains(domain.lowercase())
224181 }
225182 } ? : true
226183 }
227184 }
228185
186+ companion object {
187+ internal fun normalizeSearchResultUrl (url : String ): String {
188+ if (url.isBlank()) return url
189+
190+ val absoluteUrl = if (url.startsWith(" //" )) {
191+ " https:$url "
192+ } else {
193+ url
194+ }
195+
196+ return runCatching {
197+ val uri = URI (absoluteUrl)
198+ val host = uri.host?.lowercase()
199+ if (host != " duckduckgo.com" && host != " www.duckduckgo.com" ) {
200+ return absoluteUrl
201+ }
202+
203+ val query = uri.rawQuery ? : return absoluteUrl
204+ query.split(" &" )
205+ .firstNotNullOfOrNull { segment ->
206+ val idx = segment.indexOf(' =' )
207+ if (idx <= 0 ) return @firstNotNullOfOrNull null
208+ val key = segment.substring(0 , idx)
209+ if (key != " uddg" ) return @firstNotNullOfOrNull null
210+ URLDecoder .decode(segment.substring(idx + 1 ), StandardCharsets .UTF_8 )
211+ }
212+ ? : absoluteUrl
213+ }.getOrDefault(absoluteUrl)
214+ }
215+
216+ internal fun normalizeDomains (domains : List <String >? ): List <String >? {
217+ return domains
218+ ?.map { it.trim() }
219+ ?.filter { it.isNotEmpty() }
220+ ?.takeIf { it.isNotEmpty() }
221+ }
222+ }
223+
229224 override fun encodeResultToString (result : Result ): String =
230225 buildString {
231226 if (result.results.isEmpty()) {
@@ -243,16 +238,4 @@ IMPORTANT - Use the correct year in search queries:
243238 appendLine()
244239 appendLine(" *Click on any result to view the full content*" )
245240 }.trimEnd().truncateToolResult()
246-
247- @Serializable
248- private data class SearxResponse (
249- val results : List <SearxResult >
250- )
251-
252- @Serializable
253- private data class SearxResult (
254- val title : String ,
255- val url : String ,
256- val content : String
257- )
258241}
0 commit comments