Skip to content

Commit 15485e7

Browse files
authored
Merge pull request open-webui#10469 from open-webui/dev
0.5.17
2 parents 6fedd72 + 8241fa2 commit 15485e7

File tree

138 files changed

+5226
-3072
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

138 files changed

+5226
-3072
lines changed

CHANGELOG.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,27 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## [0.5.17] - 2025-02-27
9+
10+
### Added
11+
12+
- **🚀 Instant Document Upload with Bypass Embedding & Retrieval**: Admins can now enable "Bypass Embedding & Retrieval" in Admin Settings > Documents, significantly speeding up document uploads and ensuring full document context is retained without chunking.
13+
- **🔎 "Stream" Hook for Real-Time Filtering**: The new "stream" hook allows dynamic real-time message filtering. Learn more in our documentation (https://docs.openwebui.com/features/plugin/functions/filter).
14+
- **☁️ OneDrive Integration**: Early support for OneDrive storage integration has been introduced, expanding file import options.
15+
- **📈 Enhanced Logging with Loguru**: Backend logging has been improved with Loguru, making debugging and issue tracking far more efficient.
16+
- **⚙️ General Stability Enhancements**: Backend and frontend refactoring improves performance, ensuring a smoother and more reliable user experience.
17+
- **🌍 Updated Translations**: Refined multilingual support for better localization and accuracy across various languages.
18+
19+
### Fixed
20+
21+
- **🔄 Reliable Model Imports from the Community Platform**: Resolved import failures, allowing seamless integration of community-shared models without errors.
22+
- **📊 OpenAI Usage Statistics Restored**: Fixed an issue where OpenAI usage metrics were not displaying correctly, ensuring accurate tracking of usage data.
23+
- **🗂️ Deduplication for Retrieved Documents**: Documents retrieved during searches are now intelligently deduplicated, meaning no more redundant results—helping to keep information concise and relevant.
24+
25+
### Changed
26+
27+
- **📝 "Full Context Mode" Renamed for Clarity**: The "Full Context Mode" toggle in Web Search settings is now labeled "Bypass Embedding & Retrieval" for consistency across the UI.
28+
829
## [0.5.16] - 2025-02-20
930

1031
### Fixed

backend/open_webui/config.py

Lines changed: 63 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
from typing import Generic, Optional, TypeVar
1010
from urllib.parse import urlparse
1111

12-
import chromadb
1312
import requests
1413
from pydantic import BaseModel
1514
from sqlalchemy import JSON, Column, DateTime, Integer, func
@@ -44,7 +43,7 @@ def filter(self, record: logging.LogRecord) -> bool:
4443

4544
# Function to run the alembic migrations
4645
def run_migrations():
47-
print("Running migrations")
46+
log.info("Running migrations")
4847
try:
4948
from alembic import command
5049
from alembic.config import Config
@@ -57,7 +56,7 @@ def run_migrations():
5756

5857
command.upgrade(alembic_cfg, "head")
5958
except Exception as e:
60-
print(f"Error: {e}")
59+
log.exception(f"Error running migrations: {e}")
6160

6261

6362
run_migrations()
@@ -678,6 +677,10 @@ def oidc_oauth_register(client):
678677
S3_BUCKET_NAME = os.environ.get("S3_BUCKET_NAME", None)
679678
S3_KEY_PREFIX = os.environ.get("S3_KEY_PREFIX", None)
680679
S3_ENDPOINT_URL = os.environ.get("S3_ENDPOINT_URL", None)
680+
S3_USE_ACCELERATE_ENDPOINT = (
681+
os.environ.get("S3_USE_ACCELERATE_ENDPOINT", "False").lower() == "true"
682+
)
683+
S3_ADDRESSING_STYLE = os.environ.get("S3_ADDRESSING_STYLE", None)
681684

682685
GCS_BUCKET_NAME = os.environ.get("GCS_BUCKET_NAME", None)
683686
GOOGLE_APPLICATION_CREDENTIALS_JSON = os.environ.get(
@@ -1094,7 +1097,7 @@ class BannerModel(BaseModel):
10941097
banners = json.loads(os.environ.get("WEBUI_BANNERS", "[]"))
10951098
banners = [BannerModel(**banner) for banner in banners]
10961099
except Exception as e:
1097-
print(f"Error loading WEBUI_BANNERS: {e}")
1100+
log.exception(f"Error loading WEBUI_BANNERS: {e}")
10981101
banners = []
10991102

11001103
WEBUI_BANNERS = PersistentConfig("WEBUI_BANNERS", "ui.banners", banners)
@@ -1497,22 +1500,27 @@ class BannerModel(BaseModel):
14971500
VECTOR_DB = os.environ.get("VECTOR_DB", "chroma")
14981501

14991502
# Chroma
1500-
CHROMA_DATA_PATH = f"{DATA_DIR}/vector_db"
1501-
CHROMA_TENANT = os.environ.get("CHROMA_TENANT", chromadb.DEFAULT_TENANT)
1502-
CHROMA_DATABASE = os.environ.get("CHROMA_DATABASE", chromadb.DEFAULT_DATABASE)
1503-
CHROMA_HTTP_HOST = os.environ.get("CHROMA_HTTP_HOST", "")
1504-
CHROMA_HTTP_PORT = int(os.environ.get("CHROMA_HTTP_PORT", "8000"))
1505-
CHROMA_CLIENT_AUTH_PROVIDER = os.environ.get("CHROMA_CLIENT_AUTH_PROVIDER", "")
1506-
CHROMA_CLIENT_AUTH_CREDENTIALS = os.environ.get("CHROMA_CLIENT_AUTH_CREDENTIALS", "")
1507-
# Comma-separated list of header=value pairs
1508-
CHROMA_HTTP_HEADERS = os.environ.get("CHROMA_HTTP_HEADERS", "")
1509-
if CHROMA_HTTP_HEADERS:
1510-
CHROMA_HTTP_HEADERS = dict(
1511-
[pair.split("=") for pair in CHROMA_HTTP_HEADERS.split(",")]
1503+
if VECTOR_DB == "chroma":
1504+
import chromadb
1505+
1506+
CHROMA_DATA_PATH = f"{DATA_DIR}/vector_db"
1507+
CHROMA_TENANT = os.environ.get("CHROMA_TENANT", chromadb.DEFAULT_TENANT)
1508+
CHROMA_DATABASE = os.environ.get("CHROMA_DATABASE", chromadb.DEFAULT_DATABASE)
1509+
CHROMA_HTTP_HOST = os.environ.get("CHROMA_HTTP_HOST", "")
1510+
CHROMA_HTTP_PORT = int(os.environ.get("CHROMA_HTTP_PORT", "8000"))
1511+
CHROMA_CLIENT_AUTH_PROVIDER = os.environ.get("CHROMA_CLIENT_AUTH_PROVIDER", "")
1512+
CHROMA_CLIENT_AUTH_CREDENTIALS = os.environ.get(
1513+
"CHROMA_CLIENT_AUTH_CREDENTIALS", ""
15121514
)
1513-
else:
1514-
CHROMA_HTTP_HEADERS = None
1515-
CHROMA_HTTP_SSL = os.environ.get("CHROMA_HTTP_SSL", "false").lower() == "true"
1515+
# Comma-separated list of header=value pairs
1516+
CHROMA_HTTP_HEADERS = os.environ.get("CHROMA_HTTP_HEADERS", "")
1517+
if CHROMA_HTTP_HEADERS:
1518+
CHROMA_HTTP_HEADERS = dict(
1519+
[pair.split("=") for pair in CHROMA_HTTP_HEADERS.split(",")]
1520+
)
1521+
else:
1522+
CHROMA_HTTP_HEADERS = None
1523+
CHROMA_HTTP_SSL = os.environ.get("CHROMA_HTTP_SSL", "false").lower() == "true"
15161524
# this uses the model defined in the Dockerfile ENV variable. If you dont use docker or docker based deployments such as k8s, the default embedding model will be used (sentence-transformers/all-MiniLM-L6-v2)
15171525

15181526
# Milvus
@@ -1566,6 +1574,18 @@ class BannerModel(BaseModel):
15661574
os.environ.get("GOOGLE_DRIVE_API_KEY", ""),
15671575
)
15681576

1577+
ENABLE_ONEDRIVE_INTEGRATION = PersistentConfig(
1578+
"ENABLE_ONEDRIVE_INTEGRATION",
1579+
"onedrive.enable",
1580+
os.getenv("ENABLE_ONEDRIVE_INTEGRATION", "False").lower() == "true",
1581+
)
1582+
1583+
ONEDRIVE_CLIENT_ID = PersistentConfig(
1584+
"ONEDRIVE_CLIENT_ID",
1585+
"onedrive.client_id",
1586+
os.environ.get("ONEDRIVE_CLIENT_ID", ""),
1587+
)
1588+
15691589
# RAG Content Extraction
15701590
CONTENT_EXTRACTION_ENGINE = PersistentConfig(
15711591
"CONTENT_EXTRACTION_ENGINE",
@@ -1579,6 +1599,26 @@ class BannerModel(BaseModel):
15791599
os.getenv("TIKA_SERVER_URL", "http://tika:9998"), # Default for sidecar deployment
15801600
)
15811601

1602+
DOCUMENT_INTELLIGENCE_ENDPOINT = PersistentConfig(
1603+
"DOCUMENT_INTELLIGENCE_ENDPOINT",
1604+
"rag.document_intelligence_endpoint",
1605+
os.getenv("DOCUMENT_INTELLIGENCE_ENDPOINT", ""),
1606+
)
1607+
1608+
DOCUMENT_INTELLIGENCE_KEY = PersistentConfig(
1609+
"DOCUMENT_INTELLIGENCE_KEY",
1610+
"rag.document_intelligence_key",
1611+
os.getenv("DOCUMENT_INTELLIGENCE_KEY", ""),
1612+
)
1613+
1614+
1615+
BYPASS_EMBEDDING_AND_RETRIEVAL = PersistentConfig(
1616+
"BYPASS_EMBEDDING_AND_RETRIEVAL",
1617+
"rag.bypass_embedding_and_retrieval",
1618+
os.environ.get("BYPASS_EMBEDDING_AND_RETRIEVAL", "False").lower() == "true",
1619+
)
1620+
1621+
15821622
RAG_TOP_K = PersistentConfig(
15831623
"RAG_TOP_K", "rag.top_k", int(os.environ.get("RAG_TOP_K", "3"))
15841624
)
@@ -1795,10 +1835,10 @@ class BannerModel(BaseModel):
17951835
os.getenv("RAG_WEB_SEARCH_ENGINE", ""),
17961836
)
17971837

1798-
RAG_WEB_SEARCH_FULL_CONTEXT = PersistentConfig(
1799-
"RAG_WEB_SEARCH_FULL_CONTEXT",
1800-
"rag.web.search.full_context",
1801-
os.getenv("RAG_WEB_SEARCH_FULL_CONTEXT", "False").lower() == "true",
1838+
BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = PersistentConfig(
1839+
"BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL",
1840+
"rag.web.search.bypass_embedding_and_retrieval",
1841+
os.getenv("BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL", "False").lower() == "true",
18021842
)
18031843

18041844
# You can provide a list of your own websites to filter after performing a web search.

backend/open_webui/env.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -419,3 +419,25 @@ def parse_section(section):
419419

420420
if OFFLINE_MODE:
421421
os.environ["HF_HUB_OFFLINE"] = "1"
422+
423+
####################################
424+
# AUDIT LOGGING
425+
####################################
426+
ENABLE_AUDIT_LOGS = os.getenv("ENABLE_AUDIT_LOGS", "false").lower() == "true"
427+
# Where to store log file
428+
AUDIT_LOGS_FILE_PATH = f"{DATA_DIR}/audit.log"
429+
# Maximum size of a file before rotating into a new log file
430+
AUDIT_LOG_FILE_ROTATION_SIZE = os.getenv("AUDIT_LOG_FILE_ROTATION_SIZE", "10MB")
431+
# METADATA | REQUEST | REQUEST_RESPONSE
432+
AUDIT_LOG_LEVEL = os.getenv("AUDIT_LOG_LEVEL", "REQUEST_RESPONSE").upper()
433+
try:
434+
MAX_BODY_LOG_SIZE = int(os.environ.get("MAX_BODY_LOG_SIZE") or 2048)
435+
except ValueError:
436+
MAX_BODY_LOG_SIZE = 2048
437+
438+
# Comma separated list for urls to exclude from audit
439+
AUDIT_EXCLUDED_PATHS = os.getenv("AUDIT_EXCLUDED_PATHS", "/chats,/chat,/folders").split(
440+
","
441+
)
442+
AUDIT_EXCLUDED_PATHS = [path.strip() for path in AUDIT_EXCLUDED_PATHS]
443+
AUDIT_EXCLUDED_PATHS = [path.lstrip("/") for path in AUDIT_EXCLUDED_PATHS]

backend/open_webui/functions.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import sys
33
import inspect
44
import json
5+
import asyncio
56

67
from pydantic import BaseModel
78
from typing import AsyncGenerator, Generator, Iterator
@@ -76,11 +77,13 @@ async def get_function_models(request):
7677
if hasattr(function_module, "pipes"):
7778
sub_pipes = []
7879

79-
# Check if pipes is a function or a list
80-
80+
# Handle pipes being a list, sync function, or async function
8181
try:
8282
if callable(function_module.pipes):
83-
sub_pipes = function_module.pipes()
83+
if asyncio.iscoroutinefunction(function_module.pipes):
84+
sub_pipes = await function_module.pipes()
85+
else:
86+
sub_pipes = function_module.pipes()
8487
else:
8588
sub_pipes = function_module.pipes
8689
except Exception as e:

backend/open_webui/main.py

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@
4545
from starlette.responses import Response, StreamingResponse
4646

4747

48+
from open_webui.utils import logger
49+
from open_webui.utils.audit import AuditLevel, AuditLoggingMiddleware
50+
from open_webui.utils.logger import start_logger
4851
from open_webui.socket.main import (
4952
app as socket_app,
5053
periodic_usage_pool_cleanup,
@@ -95,6 +98,7 @@
9598
OLLAMA_API_CONFIGS,
9699
# OpenAI
97100
ENABLE_OPENAI_API,
101+
ONEDRIVE_CLIENT_ID,
98102
OPENAI_API_BASE_URLS,
99103
OPENAI_API_KEYS,
100104
OPENAI_API_CONFIGS,
@@ -161,6 +165,7 @@
161165
RAG_TEMPLATE,
162166
DEFAULT_RAG_TEMPLATE,
163167
RAG_FULL_CONTEXT,
168+
BYPASS_EMBEDDING_AND_RETRIEVAL,
164169
RAG_EMBEDDING_MODEL,
165170
RAG_EMBEDDING_MODEL_AUTO_UPDATE,
166171
RAG_EMBEDDING_MODEL_TRUST_REMOTE_CODE,
@@ -180,6 +185,8 @@
180185
CHUNK_SIZE,
181186
CONTENT_EXTRACTION_ENGINE,
182187
TIKA_SERVER_URL,
188+
DOCUMENT_INTELLIGENCE_ENDPOINT,
189+
DOCUMENT_INTELLIGENCE_KEY,
183190
RAG_TOP_K,
184191
RAG_TEXT_SPLITTER,
185192
TIKTOKEN_ENCODING_NAME,
@@ -188,7 +195,7 @@
188195
YOUTUBE_LOADER_PROXY_URL,
189196
# Retrieval (Web Search)
190197
RAG_WEB_SEARCH_ENGINE,
191-
RAG_WEB_SEARCH_FULL_CONTEXT,
198+
BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL,
192199
RAG_WEB_SEARCH_RESULT_COUNT,
193200
RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
194201
RAG_WEB_SEARCH_TRUST_ENV,
@@ -215,11 +222,13 @@
215222
GOOGLE_PSE_ENGINE_ID,
216223
GOOGLE_DRIVE_CLIENT_ID,
217224
GOOGLE_DRIVE_API_KEY,
225+
ONEDRIVE_CLIENT_ID,
218226
ENABLE_RAG_HYBRID_SEARCH,
219227
ENABLE_RAG_LOCAL_WEB_FETCH,
220228
ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
221229
ENABLE_RAG_WEB_SEARCH,
222230
ENABLE_GOOGLE_DRIVE_INTEGRATION,
231+
ENABLE_ONEDRIVE_INTEGRATION,
223232
UPLOAD_DIR,
224233
# WebUI
225234
WEBUI_AUTH,
@@ -298,8 +307,11 @@
298307
reset_config,
299308
)
300309
from open_webui.env import (
310+
AUDIT_EXCLUDED_PATHS,
311+
AUDIT_LOG_LEVEL,
301312
CHANGELOG,
302313
GLOBAL_LOG_LEVEL,
314+
MAX_BODY_LOG_SIZE,
303315
SAFE_MODE,
304316
SRC_LOG_LEVELS,
305317
VERSION,
@@ -384,6 +396,7 @@ async def get_response(self, path: str, scope):
384396

385397
@asynccontextmanager
386398
async def lifespan(app: FastAPI):
399+
start_logger()
387400
if RESET_CONFIG_ON_START:
388401
reset_config()
389402

@@ -526,13 +539,16 @@ async def lifespan(app: FastAPI):
526539

527540

528541
app.state.config.RAG_FULL_CONTEXT = RAG_FULL_CONTEXT
542+
app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL = BYPASS_EMBEDDING_AND_RETRIEVAL
529543
app.state.config.ENABLE_RAG_HYBRID_SEARCH = ENABLE_RAG_HYBRID_SEARCH
530544
app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION = (
531545
ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION
532546
)
533547

534548
app.state.config.CONTENT_EXTRACTION_ENGINE = CONTENT_EXTRACTION_ENGINE
535549
app.state.config.TIKA_SERVER_URL = TIKA_SERVER_URL
550+
app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT = DOCUMENT_INTELLIGENCE_ENDPOINT
551+
app.state.config.DOCUMENT_INTELLIGENCE_KEY = DOCUMENT_INTELLIGENCE_KEY
536552

537553
app.state.config.TEXT_SPLITTER = RAG_TEXT_SPLITTER
538554
app.state.config.TIKTOKEN_ENCODING_NAME = TIKTOKEN_ENCODING_NAME
@@ -560,10 +576,13 @@ async def lifespan(app: FastAPI):
560576

561577
app.state.config.ENABLE_RAG_WEB_SEARCH = ENABLE_RAG_WEB_SEARCH
562578
app.state.config.RAG_WEB_SEARCH_ENGINE = RAG_WEB_SEARCH_ENGINE
563-
app.state.config.RAG_WEB_SEARCH_FULL_CONTEXT = RAG_WEB_SEARCH_FULL_CONTEXT
579+
app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = (
580+
BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL
581+
)
564582
app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = RAG_WEB_SEARCH_DOMAIN_FILTER_LIST
565583

566584
app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION = ENABLE_GOOGLE_DRIVE_INTEGRATION
585+
app.state.config.ENABLE_ONEDRIVE_INTEGRATION = ENABLE_ONEDRIVE_INTEGRATION
567586
app.state.config.SEARXNG_QUERY_URL = SEARXNG_QUERY_URL
568587
app.state.config.GOOGLE_PSE_API_KEY = GOOGLE_PSE_API_KEY
569588
app.state.config.GOOGLE_PSE_ENGINE_ID = GOOGLE_PSE_ENGINE_ID
@@ -879,6 +898,19 @@ async def inspect_websocket(request: Request, call_next):
879898
app.include_router(utils.router, prefix="/api/v1/utils", tags=["utils"])
880899

881900

901+
try:
902+
audit_level = AuditLevel(AUDIT_LOG_LEVEL)
903+
except ValueError as e:
904+
logger.error(f"Invalid audit level: {AUDIT_LOG_LEVEL}. Error: {e}")
905+
audit_level = AuditLevel.NONE
906+
907+
if audit_level != AuditLevel.NONE:
908+
app.add_middleware(
909+
AuditLoggingMiddleware,
910+
audit_level=audit_level,
911+
excluded_paths=AUDIT_EXCLUDED_PATHS,
912+
max_body_size=MAX_BODY_LOG_SIZE,
913+
)
882914
##################################
883915
#
884916
# Chat Endpoints
@@ -911,7 +943,7 @@ def get_filtered_models(models, user):
911943

912944
return filtered_models
913945

914-
models = await get_all_models(request)
946+
models = await get_all_models(request, user=user)
915947

916948
# Filter out filter pipelines
917949
models = [
@@ -951,7 +983,7 @@ async def chat_completion(
951983
user=Depends(get_verified_user),
952984
):
953985
if not request.app.state.MODELS:
954-
await get_all_models(request)
986+
await get_all_models(request, user=user)
955987

956988
model_item = form_data.pop("model_item", {})
957989
tasks = form_data.pop("background_tasks", None)
@@ -1146,6 +1178,7 @@ async def get_app_config(request: Request):
11461178
"enable_admin_export": ENABLE_ADMIN_EXPORT,
11471179
"enable_admin_chat_access": ENABLE_ADMIN_CHAT_ACCESS,
11481180
"enable_google_drive_integration": app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION,
1181+
"enable_onedrive_integration": app.state.config.ENABLE_ONEDRIVE_INTEGRATION,
11491182
}
11501183
if user is not None
11511184
else {}
@@ -1177,6 +1210,7 @@ async def get_app_config(request: Request):
11771210
"client_id": GOOGLE_DRIVE_CLIENT_ID.value,
11781211
"api_key": GOOGLE_DRIVE_API_KEY.value,
11791212
},
1213+
"onedrive": {"client_id": ONEDRIVE_CLIENT_ID.value},
11801214
}
11811215
if user is not None
11821216
else {}

0 commit comments

Comments
 (0)