Skip to content

Commit ca0b721

Browse files
committed
enh: full context web search
1 parent 16ce8ab commit ca0b721

File tree

6 files changed

+93
-29
lines changed

6 files changed

+93
-29
lines changed

backend/open_webui/config.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1780,6 +1780,12 @@ class BannerModel(BaseModel):
17801780
os.getenv("RAG_WEB_SEARCH_ENGINE", ""),
17811781
)
17821782

1783+
RAG_WEB_SEARCH_FULL_CONTEXT = PersistentConfig(
1784+
"RAG_WEB_SEARCH_FULL_CONTEXT",
1785+
"rag.web.search.full_context",
1786+
os.getenv("RAG_WEB_SEARCH_FULL_CONTEXT", "False").lower() == "true",
1787+
)
1788+
17831789
# You can provide a list of your own websites to filter after performing a web search.
17841790
# This ensures the highest level of safety and reliability of the information sources.
17851791
RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = PersistentConfig(

backend/open_webui/main.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@
179179
YOUTUBE_LOADER_PROXY_URL,
180180
# Retrieval (Web Search)
181181
RAG_WEB_SEARCH_ENGINE,
182+
RAG_WEB_SEARCH_FULL_CONTEXT,
182183
RAG_WEB_SEARCH_RESULT_COUNT,
183184
RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
184185
RAG_WEB_SEARCH_TRUST_ENV,
@@ -548,6 +549,7 @@ async def lifespan(app: FastAPI):
548549

549550
app.state.config.ENABLE_RAG_WEB_SEARCH = ENABLE_RAG_WEB_SEARCH
550551
app.state.config.RAG_WEB_SEARCH_ENGINE = RAG_WEB_SEARCH_ENGINE
552+
app.state.config.RAG_WEB_SEARCH_FULL_CONTEXT = RAG_WEB_SEARCH_FULL_CONTEXT
551553
app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = RAG_WEB_SEARCH_DOMAIN_FILTER_LIST
552554

553555
app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION = ENABLE_GOOGLE_DRIVE_INTEGRATION

backend/open_webui/retrieval/utils.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,14 @@ def get_sources_from_files(
304304
relevant_contexts = []
305305

306306
for file in files:
307-
if file.get("context") == "full":
307+
if file.get("docs"):
308+
309+
print("file.get('docs')", file.get("docs"))
310+
context = {
311+
"documents": [[doc.get("content") for doc in file.get("docs")]],
312+
"metadatas": [[doc.get("metadata") for doc in file.get("docs")]],
313+
}
314+
elif file.get("context") == "full":
308315
context = {
309316
"documents": [[file.get("file").get("data", {}).get("content")]],
310317
"metadatas": [[{"file_id": file.get("id"), "name": file.get("name")}]],

backend/open_webui/routers/retrieval.py

Lines changed: 41 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,8 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)):
371371
"proxy_url": request.app.state.config.YOUTUBE_LOADER_PROXY_URL,
372372
},
373373
"web": {
374-
"web_loader_ssl_verification": request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
374+
"ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION": request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
375+
"RAG_WEB_SEARCH_FULL_CONTEXT": request.app.state.config.RAG_WEB_SEARCH_FULL_CONTEXT,
375376
"search": {
376377
"enabled": request.app.state.config.ENABLE_RAG_WEB_SEARCH,
377378
"drive": request.app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION,
@@ -457,7 +458,8 @@ class WebSearchConfig(BaseModel):
457458

458459
class WebConfig(BaseModel):
459460
search: WebSearchConfig
460-
web_loader_ssl_verification: Optional[bool] = None
461+
ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION: Optional[bool] = None
462+
RAG_WEB_SEARCH_FULL_CONTEXT: Optional[bool] = None
461463

462464

463465
class ConfigUpdateForm(BaseModel):
@@ -512,11 +514,16 @@ async def update_rag_config(
512514
if form_data.web is not None:
513515
request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION = (
514516
# Note: When UI "Bypass SSL verification for Websites"=True then ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION=False
515-
form_data.web.web_loader_ssl_verification
517+
form_data.web.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION
516518
)
517519

518520
request.app.state.config.ENABLE_RAG_WEB_SEARCH = form_data.web.search.enabled
519521
request.app.state.config.RAG_WEB_SEARCH_ENGINE = form_data.web.search.engine
522+
523+
request.app.state.config.RAG_WEB_SEARCH_FULL_CONTEXT = (
524+
form_data.web.RAG_WEB_SEARCH_FULL_CONTEXT
525+
)
526+
520527
request.app.state.config.SEARXNG_QUERY_URL = (
521528
form_data.web.search.searxng_query_url
522529
)
@@ -600,7 +607,8 @@ async def update_rag_config(
600607
"translation": request.app.state.YOUTUBE_LOADER_TRANSLATION,
601608
},
602609
"web": {
603-
"web_loader_ssl_verification": request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
610+
"ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION": request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
611+
"RAG_WEB_SEARCH_FULL_CONTEXT": request.app.state.config.RAG_WEB_SEARCH_FULL_CONTEXT,
604612
"search": {
605613
"enabled": request.app.state.config.ENABLE_RAG_WEB_SEARCH,
606614
"engine": request.app.state.config.RAG_WEB_SEARCH_ENGINE,
@@ -1349,21 +1357,36 @@ async def process_web_search(
13491357
trust_env=request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV,
13501358
)
13511359
docs = await loader.aload()
1352-
await run_in_threadpool(
1353-
save_docs_to_vector_db,
1354-
request,
1355-
docs,
1356-
collection_name,
1357-
overwrite=True,
1358-
user=user,
1359-
)
13601360

1361-
return {
1362-
"status": True,
1363-
"collection_name": collection_name,
1364-
"filenames": urls,
1365-
"loaded_count": len(docs),
1366-
}
1361+
if request.app.state.config.RAG_WEB_SEARCH_FULL_CONTEXT:
1362+
return {
1363+
"status": True,
1364+
"docs": [
1365+
{
1366+
"content": doc.page_content,
1367+
"metadata": doc.metadata,
1368+
}
1369+
for doc in docs
1370+
],
1371+
"filenames": urls,
1372+
"loaded_count": len(docs),
1373+
}
1374+
else:
1375+
await run_in_threadpool(
1376+
save_docs_to_vector_db,
1377+
request,
1378+
docs,
1379+
collection_name,
1380+
overwrite=True,
1381+
user=user,
1382+
)
1383+
1384+
return {
1385+
"status": True,
1386+
"collection_name": collection_name,
1387+
"filenames": urls,
1388+
"loaded_count": len(docs),
1389+
}
13671390
except Exception as e:
13681391
log.exception(e)
13691392
raise HTTPException(

backend/open_webui/utils/middleware.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -362,14 +362,25 @@ async def chat_web_search_handler(
362362
)
363363

364364
files = form_data.get("files", [])
365-
files.append(
366-
{
367-
"collection_name": results["collection_name"],
368-
"name": searchQuery,
369-
"type": "web_search_results",
370-
"urls": results["filenames"],
371-
}
372-
)
365+
366+
if request.app.state.config.RAG_WEB_SEARCH_FULL_CONTEXT:
367+
files.append(
368+
{
369+
"docs": results.get("docs", []),
370+
"name": searchQuery,
371+
"type": "web_search_docs",
372+
"urls": results["filenames"],
373+
}
374+
)
375+
else:
376+
files.append(
377+
{
378+
"collection_name": results["collection_name"],
379+
"name": searchQuery,
380+
"type": "web_search_results",
381+
"urls": results["filenames"],
382+
}
383+
)
373384
form_data["files"] = files
374385
else:
375386
await event_emitter(

src/lib/components/admin/Settings/WebSearch.svelte

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import { onMount, getContext } from 'svelte';
77
import { toast } from 'svelte-sonner';
88
import SensitiveInput from '$lib/components/common/SensitiveInput.svelte';
9+
import Tooltip from '$lib/components/common/Tooltip.svelte';
910
1011
const i18n = getContext('i18n');
1112
@@ -116,6 +117,19 @@
116117
</div>
117118
</div>
118119

120+
<div class=" py-0.5 flex w-full justify-between">
121+
<div class=" self-center text-xs font-medium">{$i18n.t('Full Context Mode')}</div>
122+
<div class="flex items-center relative">
123+
<Tooltip
124+
content={webConfig.RAG_WEB_SEARCH_FULL_CONTEXT
125+
? 'Inject the entire web results as context for comprehensive processing, this is recommended for complex queries.'
126+
: 'Default to segmented retrieval for focused and relevant content extraction, this is recommended for most cases.'}
127+
>
128+
<Switch bind:state={webConfig.RAG_WEB_SEARCH_FULL_CONTEXT} />
129+
</Tooltip>
130+
</div>
131+
</div>
132+
119133
{#if webConfig.search.engine !== ''}
120134
<div class="mt-1.5">
121135
{#if webConfig.search.engine === 'searxng'}
@@ -424,12 +438,13 @@
424438
<button
425439
class="p-1 px-3 text-xs flex rounded-sm transition"
426440
on:click={() => {
427-
webConfig.web_loader_ssl_verification = !webConfig.web_loader_ssl_verification;
441+
webConfig.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION =
442+
!webConfig.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION;
428443
submitHandler();
429444
}}
430445
type="button"
431446
>
432-
{#if webConfig.web_loader_ssl_verification === false}
447+
{#if webConfig.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION === false}
433448
<span class="ml-2 self-center">{$i18n.t('On')}</span>
434449
{:else}
435450
<span class="ml-2 self-center">{$i18n.t('Off')}</span>

0 commit comments

Comments
 (0)