Skip to content

Commit eeb00a5

Browse files
committed
chore: format
1 parent 2b913a9 commit eeb00a5

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+1548
-1210
lines changed

backend/open_webui/retrieval/web/utils.py

Lines changed: 36 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,12 @@
1515
Optional,
1616
Sequence,
1717
Union,
18-
Literal
18+
Literal,
1919
)
2020
import aiohttp
2121
import certifi
2222
import validators
23-
from langchain_community.document_loaders import (
24-
PlaywrightURLLoader,
25-
WebBaseLoader
26-
)
23+
from langchain_community.document_loaders import PlaywrightURLLoader, WebBaseLoader
2724
from langchain_community.document_loaders.firecrawl import FireCrawlLoader
2825
from langchain_community.document_loaders.base import BaseLoader
2926
from langchain_core.documents import Document
@@ -33,7 +30,7 @@
3330
PLAYWRIGHT_WS_URI,
3431
RAG_WEB_LOADER_ENGINE,
3532
FIRECRAWL_API_BASE_URL,
36-
FIRECRAWL_API_KEY
33+
FIRECRAWL_API_KEY,
3734
)
3835
from open_webui.env import SRC_LOG_LEVELS
3936

@@ -75,6 +72,7 @@ def safe_validate_urls(url: Sequence[str]) -> Sequence[str]:
7572
continue
7673
return valid_urls
7774

75+
7876
def resolve_hostname(hostname):
7977
# Get address information
8078
addr_info = socket.getaddrinfo(hostname, None)
@@ -85,16 +83,13 @@ def resolve_hostname(hostname):
8583

8684
return ipv4_addresses, ipv6_addresses
8785

86+
8887
def extract_metadata(soup, url):
89-
metadata = {
90-
"source": url
91-
}
88+
metadata = {"source": url}
9289
if title := soup.find("title"):
9390
metadata["title"] = title.get_text()
9491
if description := soup.find("meta", attrs={"name": "description"}):
95-
metadata["description"] = description.get(
96-
"content", "No description found."
97-
)
92+
metadata["description"] = description.get("content", "No description found.")
9893
if html := soup.find("html"):
9994
metadata["language"] = html.get("lang", "No language found.")
10095
return metadata
@@ -104,7 +99,7 @@ def verify_ssl_cert(url: str) -> bool:
10499
"""Verify SSL certificate for the given URL."""
105100
if not url.startswith("https://"):
106101
return True
107-
102+
108103
try:
109104
hostname = url.split("://")[-1].split("/")[0]
110105
context = ssl.create_default_context(cafile=certifi.where())
@@ -133,7 +128,7 @@ def __init__(
133128
params: Optional[Dict] = None,
134129
):
135130
"""Concurrent document loader for FireCrawl operations.
136-
131+
137132
Executes multiple FireCrawlLoader instances concurrently using thread pooling
138133
to improve bulk processing efficiency.
139134
Args:
@@ -142,7 +137,7 @@ def __init__(
142137
trust_env: If True, use proxy settings from environment variables.
143138
requests_per_second: Number of requests per second to limit to.
144139
continue_on_failure (bool): If True, continue loading other URLs on failure.
145-
api_key: API key for FireCrawl service. Defaults to None
140+
api_key: API key for FireCrawl service. Defaults to None
146141
(uses FIRE_CRAWL_API_KEY environment variable if not provided).
147142
api_url: Base URL for FireCrawl API. Defaults to official API endpoint.
148143
mode: Operation mode selection:
@@ -154,15 +149,15 @@ def __init__(
154149
Examples include crawlerOptions.
155150
For more details, visit: https://github.com/mendableai/firecrawl-py
156151
"""
157-
proxy_server = proxy.get('server') if proxy else None
152+
proxy_server = proxy.get("server") if proxy else None
158153
if trust_env and not proxy_server:
159154
env_proxies = urllib.request.getproxies()
160-
env_proxy_server = env_proxies.get('https') or env_proxies.get('http')
155+
env_proxy_server = env_proxies.get("https") or env_proxies.get("http")
161156
if env_proxy_server:
162157
if proxy:
163-
proxy['server'] = env_proxy_server
158+
proxy["server"] = env_proxy_server
164159
else:
165-
proxy = { 'server': env_proxy_server }
160+
proxy = {"server": env_proxy_server}
166161
self.web_paths = web_paths
167162
self.verify_ssl = verify_ssl
168163
self.requests_per_second = requests_per_second
@@ -184,7 +179,7 @@ def lazy_load(self) -> Iterator[Document]:
184179
api_key=self.api_key,
185180
api_url=self.api_url,
186181
mode=self.mode,
187-
params=self.params
182+
params=self.params,
188183
)
189184
yield from loader.lazy_load()
190185
except Exception as e:
@@ -203,7 +198,7 @@ async def alazy_load(self):
203198
api_key=self.api_key,
204199
api_url=self.api_url,
205200
mode=self.mode,
206-
params=self.params
201+
params=self.params,
207202
)
208203
async for document in loader.alazy_load():
209204
yield document
@@ -251,7 +246,7 @@ def _safe_process_url_sync(self, url: str) -> bool:
251246

252247
class SafePlaywrightURLLoader(PlaywrightURLLoader):
253248
"""Load HTML pages safely with Playwright, supporting SSL verification, rate limiting, and remote browser connection.
254-
249+
255250
Attributes:
256251
web_paths (List[str]): List of URLs to load.
257252
verify_ssl (bool): If True, verify SSL certificates.
@@ -273,27 +268,27 @@ def __init__(
273268
headless: bool = True,
274269
remove_selectors: Optional[List[str]] = None,
275270
proxy: Optional[Dict[str, str]] = None,
276-
playwright_ws_url: Optional[str] = None
271+
playwright_ws_url: Optional[str] = None,
277272
):
278273
"""Initialize with additional safety parameters and remote browser support."""
279274

280-
proxy_server = proxy.get('server') if proxy else None
275+
proxy_server = proxy.get("server") if proxy else None
281276
if trust_env and not proxy_server:
282277
env_proxies = urllib.request.getproxies()
283-
env_proxy_server = env_proxies.get('https') or env_proxies.get('http')
278+
env_proxy_server = env_proxies.get("https") or env_proxies.get("http")
284279
if env_proxy_server:
285280
if proxy:
286-
proxy['server'] = env_proxy_server
281+
proxy["server"] = env_proxy_server
287282
else:
288-
proxy = { 'server': env_proxy_server }
283+
proxy = {"server": env_proxy_server}
289284

290285
# We'll set headless to False if using playwright_ws_url since it's handled by the remote browser
291286
super().__init__(
292287
urls=web_paths,
293288
continue_on_failure=continue_on_failure,
294289
headless=headless if playwright_ws_url is None else False,
295290
remove_selectors=remove_selectors,
296-
proxy=proxy
291+
proxy=proxy,
297292
)
298293
self.verify_ssl = verify_ssl
299294
self.requests_per_second = requests_per_second
@@ -339,7 +334,9 @@ async def alazy_load(self) -> AsyncIterator[Document]:
339334
if self.playwright_ws_url:
340335
browser = await p.chromium.connect(self.playwright_ws_url)
341336
else:
342-
browser = await p.chromium.launch(headless=self.headless, proxy=self.proxy)
337+
browser = await p.chromium.launch(
338+
headless=self.headless, proxy=self.proxy
339+
)
343340

344341
for url in self.urls:
345342
try:
@@ -394,6 +391,7 @@ def _safe_process_url_sync(self, url: str) -> bool:
394391
self._sync_wait_for_rate_limit()
395392
return True
396393

394+
397395
class SafeWebBaseLoader(WebBaseLoader):
398396
"""WebBaseLoader with enhanced error handling for URLs."""
399397

@@ -496,11 +494,13 @@ async def aload(self) -> list[Document]:
496494
"""Load data into Document objects."""
497495
return [document async for document in self.alazy_load()]
498496

497+
499498
RAG_WEB_LOADER_ENGINES = defaultdict(lambda: SafeWebBaseLoader)
500499
RAG_WEB_LOADER_ENGINES["playwright"] = SafePlaywrightURLLoader
501500
RAG_WEB_LOADER_ENGINES["safe_web"] = SafeWebBaseLoader
502501
RAG_WEB_LOADER_ENGINES["firecrawl"] = SafeFireCrawlLoader
503502

503+
504504
def get_web_loader(
505505
urls: Union[str, Sequence[str]],
506506
verify_ssl: bool = True,
@@ -515,7 +515,7 @@ def get_web_loader(
515515
"verify_ssl": verify_ssl,
516516
"requests_per_second": requests_per_second,
517517
"continue_on_failure": True,
518-
"trust_env": trust_env
518+
"trust_env": trust_env,
519519
}
520520

521521
if PLAYWRIGHT_WS_URI.value:
@@ -529,6 +529,10 @@ def get_web_loader(
529529
WebLoaderClass = RAG_WEB_LOADER_ENGINES[RAG_WEB_LOADER_ENGINE.value]
530530
web_loader = WebLoaderClass(**web_loader_args)
531531

532-
log.debug("Using RAG_WEB_LOADER_ENGINE %s for %s URLs", web_loader.__class__.__name__, len(safe_urls))
532+
log.debug(
533+
"Using RAG_WEB_LOADER_ENGINE %s for %s URLs",
534+
web_loader.__class__.__name__,
535+
len(safe_urls),
536+
)
533537

534-
return web_loader
538+
return web_loader

backend/open_webui/routers/audio.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -267,8 +267,10 @@ async def speech(request: Request, user=Depends(get_verified_user)):
267267

268268
try:
269269
# print(payload)
270-
timeout=aiohttp.ClientTimeout(total=AIOHTTP_CLIENT_TIMEOUT)
271-
async with aiohttp.ClientSession(timeout=timeout, trust_env=True) as session:
270+
timeout = aiohttp.ClientTimeout(total=AIOHTTP_CLIENT_TIMEOUT)
271+
async with aiohttp.ClientSession(
272+
timeout=timeout, trust_env=True
273+
) as session:
272274
async with session.post(
273275
url=f"{request.app.state.config.TTS_OPENAI_API_BASE_URL}/audio/speech",
274276
json=payload,
@@ -325,8 +327,10 @@ async def speech(request: Request, user=Depends(get_verified_user)):
325327
)
326328

327329
try:
328-
timeout=aiohttp.ClientTimeout(total=AIOHTTP_CLIENT_TIMEOUT)
329-
async with aiohttp.ClientSession(timeout=timeout, trust_env=True) as session:
330+
timeout = aiohttp.ClientTimeout(total=AIOHTTP_CLIENT_TIMEOUT)
331+
async with aiohttp.ClientSession(
332+
timeout=timeout, trust_env=True
333+
) as session:
330334
async with session.post(
331335
f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}",
332336
json={
@@ -383,8 +387,10 @@ async def speech(request: Request, user=Depends(get_verified_user)):
383387
data = f"""<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="{locale}">
384388
<voice name="{language}">{payload["input"]}</voice>
385389
</speak>"""
386-
timeout=aiohttp.ClientTimeout(total=AIOHTTP_CLIENT_TIMEOUT)
387-
async with aiohttp.ClientSession(timeout=timeout, trust_env=True) as session:
390+
timeout = aiohttp.ClientTimeout(total=AIOHTTP_CLIENT_TIMEOUT)
391+
async with aiohttp.ClientSession(
392+
timeout=timeout, trust_env=True
393+
) as session:
388394
async with session.post(
389395
f"https://{region}.tts.speech.microsoft.com/cognitiveservices/v1",
390396
headers={

backend/open_webui/routers/auths.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -547,7 +547,7 @@ async def signout(request: Request, response: Response):
547547
response.delete_cookie("oauth_id_token")
548548
return RedirectResponse(
549549
headers=response.headers,
550-
url=f"{logout_url}?id_token_hint={oauth_id_token}"
550+
url=f"{logout_url}?id_token_hint={oauth_id_token}",
551551
)
552552
else:
553553
raise HTTPException(

backend/open_webui/routers/ollama.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -944,8 +944,12 @@ class ChatMessage(BaseModel):
944944
@classmethod
945945
def check_at_least_one_field(cls, field_value, values, **kwargs):
946946
# Raise an error if both 'content' and 'tool_calls' are None
947-
if field_value is None and ("tool_calls" not in values or values["tool_calls"] is None):
948-
raise ValueError("At least one of 'content' or 'tool_calls' must be provided")
947+
if field_value is None and (
948+
"tool_calls" not in values or values["tool_calls"] is None
949+
):
950+
raise ValueError(
951+
"At least one of 'content' or 'tool_calls' must be provided"
952+
)
949953

950954
return field_value
951955

backend/open_webui/utils/oauth.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -253,23 +253,32 @@ async def handle_callback(self, request, provider, response):
253253
if provider == "github":
254254
try:
255255
access_token = token.get("access_token")
256-
headers = {
257-
"Authorization": f"Bearer {access_token}"
258-
}
256+
headers = {"Authorization": f"Bearer {access_token}"}
259257
async with aiohttp.ClientSession() as session:
260-
async with session.get("https://api.github.com/user/emails", headers=headers) as resp:
258+
async with session.get(
259+
"https://api.github.com/user/emails", headers=headers
260+
) as resp:
261261
if resp.ok:
262262
emails = await resp.json()
263263
# use the primary email as the user's email
264-
primary_email = next((e["email"] for e in emails if e.get("primary")), None)
264+
primary_email = next(
265+
(e["email"] for e in emails if e.get("primary")),
266+
None,
267+
)
265268
if primary_email:
266269
email = primary_email
267270
else:
268-
log.warning("No primary email found in GitHub response")
269-
raise HTTPException(400, detail=ERROR_MESSAGES.INVALID_CRED)
271+
log.warning(
272+
"No primary email found in GitHub response"
273+
)
274+
raise HTTPException(
275+
400, detail=ERROR_MESSAGES.INVALID_CRED
276+
)
270277
else:
271278
log.warning("Failed to fetch GitHub email")
272-
raise HTTPException(400, detail=ERROR_MESSAGES.INVALID_CRED)
279+
raise HTTPException(
280+
400, detail=ERROR_MESSAGES.INVALID_CRED
281+
)
273282
except Exception as e:
274283
log.warning(f"Error fetching GitHub email: {e}")
275284
raise HTTPException(400, detail=ERROR_MESSAGES.INVALID_CRED)

backend/open_webui/utils/payload.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ def convert_messages_openai_to_ollama(messages: list[dict]) -> list[dict]:
151151

152152
# Put the content to empty string (Ollama requires an empty string for tool calls)
153153
new_message["content"] = ""
154-
154+
155155
else:
156156
# Otherwise, assume the content is a list of dicts, e.g., text followed by an image URL
157157
content_text = ""
@@ -215,16 +215,20 @@ def convert_payload_openai_to_ollama(openai_payload: dict) -> dict:
215215
if openai_payload.get("options"):
216216
ollama_payload["options"] = openai_payload["options"]
217217
ollama_options = openai_payload["options"]
218-
218+
219219
# Re-Mapping OpenAI's `max_tokens` -> Ollama's `num_predict`
220220
if "max_tokens" in ollama_options:
221-
ollama_options["num_predict"] = ollama_options["max_tokens"]
222-
del ollama_options["max_tokens"] # To prevent Ollama warning of invalid option provided
221+
ollama_options["num_predict"] = ollama_options["max_tokens"]
222+
del ollama_options[
223+
"max_tokens"
224+
] # To prevent Ollama warning of invalid option provided
223225

224226
# Ollama lacks a "system" prompt option. It has to be provided as a direct parameter, so we copy it down.
225227
if "system" in ollama_options:
226-
ollama_payload["system"] = ollama_options["system"]
227-
del ollama_options["system"] # To prevent Ollama warning of invalid option provided
228+
ollama_payload["system"] = ollama_options["system"]
229+
del ollama_options[
230+
"system"
231+
] # To prevent Ollama warning of invalid option provided
228232

229233
if "metadata" in openai_payload:
230234
ollama_payload["metadata"] = openai_payload["metadata"]

0 commit comments

Comments
 (0)