Unsupervisedcom
diff --git a/‎.github/dependabot.yml‎
Lines changed: 14 additions & 0 deletions b/‎.github/dependabot.yml‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎.github/workflows/format-backend.yaml‎
Lines changed: 12 additions & 2 deletions b/‎.github/workflows/format-backend.yaml‎
Lines changed: 12 additions & 2 deletions
diff --git a/‎.github/workflows/format-build-frontend.yaml‎
Lines changed: 9 additions & 1 deletion b/‎.github/workflows/format-build-frontend.yaml‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎CHANGELOG.md‎
Lines changed: 40 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎backend/open_webui/config.py‎
Lines changed: 31 additions & 7 deletions b/‎backend/open_webui/config.py‎
Lines changed: 31 additions & 7 deletions
diff --git a/‎backend/open_webui/main.py‎
Lines changed: 18 additions & 0 deletions b/‎backend/open_webui/main.py‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎backend/open_webui/retrieval/loaders/main.py‎
Lines changed: 27 additions & 11 deletions b/‎backend/open_webui/retrieval/loaders/main.py‎
Lines changed: 27 additions & 11 deletions
@@ -1,12 +1,26 @@
 version: 2
 updates:
+  - package-ecosystem: uv
+    directory: '/'
+    schedule:
+      interval: monthly
+    target-branch: 'dev'
+
   - package-ecosystem: pip
     directory: '/backend'
     schedule:
       interval: monthly
     target-branch: 'dev'
+
+  - package-ecosystem: npm
+    directory: '/'
+    schedule:
+      interval: monthly
+    target-branch: 'dev'
+
   - package-ecosystem: 'github-actions'
     directory: '/'
     schedule:
       # Check for updates to GitHub Actions every week
       interval: monthly
+    target-branch: 'dev'
@@ -5,10 +5,18 @@ on:
     branches:
       - main
       - dev
+    paths:
+      - 'backend/**'
+      - 'pyproject.toml'
+      - 'uv.lock'
   pull_request:
     branches:
       - main
       - dev
+    paths:
+      - 'backend/**'
+      - 'pyproject.toml'
+      - 'uv.lock'
 
 jobs:
   build:
@@ -17,15 +25,17 @@ jobs:
 
     strategy:
       matrix:
-        python-version: [3.11]
+        python-version:
+          - 3.11.x
+          - 3.12.x
 
     steps:
       - uses: actions/checkout@v4
 
       - name: Set up Python
         uses: actions/setup-python@v5
         with:
-          python-version: ${{ matrix.python-version }}
+          python-version: '${{ matrix.python-version }}'
 
       - name: Install dependencies
         run: |
 
@@ -5,10 +5,18 @@ on:
     branches:
       - main
       - dev
+    paths-ignore:
+      - 'backend/**'
+      - 'pyproject.toml'
+      - 'uv.lock'
   pull_request:
     branches:
       - main
       - dev
+    paths-ignore:
+      - 'backend/**'
+      - 'pyproject.toml'
+      - 'uv.lock'
 
 jobs:
   build:
@@ -21,7 +29,7 @@ jobs:
       - name: Setup Node.js
         uses: actions/setup-node@v4
         with:
-          node-version: '22' # Or specify any other version you want to use
+          node-version: '22'
 
       - name: Install Dependencies
         run: npm install
 
@@ -5,6 +5,46 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.6.2] - 2025-04-06
+
+### Added
+
+- 🌍 **Improved Global Language Support**: Expanded and refined translations across multiple languages to enhance clarity and consistency for international users.
+
+### Fixed
+
+- 🛠️ **Accurate Tool Descriptions from OpenAPI Servers**: External tools now use full endpoint descriptions instead of summaries when generating tool specifications—helping AI models understand tool purpose more precisely and choose the right tool more accurately in tool workflows.
+- 🔧 **Precise Web Results Source Attribution**: Fixed a key issue where all web search results showed the same source ID—now each result gets its correct and distinct source, ensuring accurate citations and traceability.
+- 🔍 **Clean Web Search Retrieval**: Web search now retains only results from URLs where real content was successfully fetched—improving accuracy and removing empty or broken links from citations.
+- 🎵 **Audio File Upload Response Restored**: Resolved an issue where uploading audio files did not return valid responses, restoring smooth file handling for transcription and audio-based workflows.
+
+### Changed
+
+- 🧰 **General Backend Refactoring**: Multiple behind-the-scenes improvements streamline backend performance, reduce complexity, and ensure a more stable, maintainable system overall—making everything smoother without changing your workflow.
+
+## [0.6.1] - 2025-04-05
+
+### Added
+
+- 🛠️ **Global Tool Servers Configuration**: Admins can now centrally configure global external tool servers from Admin Settings > Tools, allowing seamless sharing of tool integrations across all users without manual setup per user.
+- 🔐 **Direct Tool Usage Permission for Users**: Introduced a new user-level permission toggle that grants non-admin users access to direct external tools, empowering broader team collaboration while maintaining control.
+- 🧠 **Mistral OCR Content Extraction Support**: Added native support for Mistral OCR as a high-accuracy document loader, drastically improving text extraction from scanned documents in RAG workflows.
+- 🖼️ **Tools Indicator UI Redesign**: Enhanced message input now smartly displays both built-in and external tools via a unified dropdown, making it simpler and more intuitive to activate tools during conversations.
+- 📄 **RAG Prompt Improved and More Coherent**: Default RAG system prompt has been revised to be more clear and citation-focused—admins can leave the template field empty to use this new gold-standard prompt.
+- 🧰 **Performance & Developer Improvements**: Major internal restructuring of several tool-related components, simplifying styling and merging external/internal handling logic, resulting in better maintainability and performance.
+- 🌍 **Improved Translations**: Updated translations for Tibetan, Polish, Chinese (Simplified & Traditional), Arabic, Russian, Ukrainian, Dutch, Finnish, and French to improve clarity and consistency across the interface.
+
+### Fixed
+
+- 🔑 **External Tool Server API Key Bug Resolved**: Fixed a critical issue where authentication headers were not being sent when calling tools from external OpenAPI tool servers, ensuring full security and smooth tool operations.
+- 🚫 **Conditional Export Button Visibility**: UI now gracefully hides export buttons when there's nothing to export in models, prompts, tools, or functions, improving visual clarity and reducing confusion.
+- 🧪 **Hybrid Search Failure Recovery**: Resolved edge case in parallel hybrid search where empty or unindexed collections caused backend crashes—these are now cleanly skipped to ensure system stability.
+- 📂 **Admin Folder Deletion Fix**: Addressed an issue where folders created in the admin workspace couldn't be deleted, restoring full organizational flexibility for admins.
+- 🔐 **Improved Generic Error Feedback on Login**: Authentication errors now show simplified, non-revealing messages for privacy and improved UX, especially with federated logins.
+- 📝 **Tool Message with Images Improved**: Enhanced how tool-generated messages with image outputs are shown in chat, making them more readable and consistent with the overall UI design.
+- ⚙️ **Auto-Exclusion for Broken RAG Collections**: Auto-skips document collections that fail to fetch data or return "None", preventing silent errors and streamlining retrieval workflows.
+- 📝 **Docling Text File Handling Fix**: Fixed file parsing inconsistency that broke docling-based RAG functionality for certain plain text files, ensuring wider file compatibility.
+
 ## [0.6.0] - 2025-03-31
 
 ### Added
 
@@ -331,12 +331,14 @@ def __getattr__(self, key):
 # OAuth config
 ####################################
 
+
 ENABLE_OAUTH_SIGNUP = PersistentConfig(
     "ENABLE_OAUTH_SIGNUP",
     "oauth.enable_signup",
     os.environ.get("ENABLE_OAUTH_SIGNUP", "False").lower() == "true",
 )
 
+
 OAUTH_MERGE_ACCOUNTS_BY_EMAIL = PersistentConfig(
     "OAUTH_MERGE_ACCOUNTS_BY_EMAIL",
     "oauth.merge_accounts_by_email",
@@ -466,6 +468,7 @@ def __getattr__(self, key):
     os.environ.get("OAUTH_USERNAME_CLAIM", "name"),
 )
 
+
 OAUTH_PICTURE_CLAIM = PersistentConfig(
     "OAUTH_PICTURE_CLAIM",
     "oauth.oidc.avatar_claim",
@@ -878,6 +881,17 @@ def oidc_oauth_register(client):
     pass
 OPENAI_API_BASE_URL = "https://api.openai.com/v1"
 
+####################################
+# TOOL_SERVERS
+####################################
+
+
+TOOL_SERVER_CONNECTIONS = PersistentConfig(
+    "TOOL_SERVER_CONNECTIONS",
+    "tool_server.connections",
+    [],
+)
+
 ####################################
 # WEBUI
 ####################################
@@ -1034,6 +1048,11 @@ def oidc_oauth_register(client):
     == "true"
 )
 
+USER_PERMISSIONS_FEATURES_DIRECT_TOOL_SERVERS = (
+    os.environ.get("USER_PERMISSIONS_FEATURES_DIRECT_TOOL_SERVERS", "False").lower()
+    == "true"
+)
+
 USER_PERMISSIONS_FEATURES_WEB_SEARCH = (
     os.environ.get("USER_PERMISSIONS_FEATURES_WEB_SEARCH", "True").lower() == "true"
 )
@@ -1071,6 +1090,7 @@ def oidc_oauth_register(client):
         "temporary_enforced": USER_PERMISSIONS_CHAT_TEMPORARY_ENFORCED,
     },
     "features": {
+        "direct_tool_servers": USER_PERMISSIONS_FEATURES_DIRECT_TOOL_SERVERS,
         "web_search": USER_PERMISSIONS_FEATURES_WEB_SEARCH,
         "image_generation": USER_PERMISSIONS_FEATURES_IMAGE_GENERATION,
         "code_interpreter": USER_PERMISSIONS_FEATURES_CODE_INTERPRETER,
@@ -1727,6 +1747,11 @@ class BannerModel(BaseModel):
     os.getenv("DOCUMENT_INTELLIGENCE_KEY", ""),
 )
 
+MISTRAL_OCR_API_KEY = PersistentConfig(
+    "MISTRAL_OCR_API_KEY",
+    "rag.mistral_ocr_api_key",
+    os.getenv("MISTRAL_OCR_API_KEY", ""),
+)
 
 BYPASS_EMBEDDING_AND_RETRIEVAL = PersistentConfig(
     "BYPASS_EMBEDDING_AND_RETRIEVAL",
@@ -1875,26 +1900,25 @@ class BannerModel(BaseModel):
 )
 
 DEFAULT_RAG_TEMPLATE = """### Task:
-Respond to the user query using the provided context, incorporating inline citations in the format [source_id] **only when the <source_id> tag is explicitly provided** in the context.
+Respond to the user query using the provided context, incorporating inline citations in the format [id] **only when the <source> tag includes an explicit id attribute** (e.g., <source id="1">).
 
 ### Guidelines:
 - If you don't know the answer, clearly state that.
 - If uncertain, ask the user for clarification.
 - Respond in the same language as the user's query.
 - If the context is unreadable or of poor quality, inform the user and provide the best possible answer.
 - If the answer isn't present in the context but you possess the knowledge, explain this to the user and provide the answer using your own understanding.
-- **Only include inline citations using [source_id] (e.g., [1], [2]) when a `<source_id>` tag is explicitly provided in the context.**
-- Do not cite if the <source_id> tag is not provided in the context.  
+- **Only include inline citations using [id] (e.g., [1], [2]) when the <source> tag includes an id attribute.**
+- Do not cite if the <source> tag does not contain an id attribute.
 - Do not use XML tags in your response.
 - Ensure citations are concise and directly related to the information provided.
 
 ### Example of Citation:
-If the user asks about a specific topic and the information is found in "whitepaper.pdf" with a provided <source_id>, the response should include the citation like so:  
-* "According to the study, the proposed method increases efficiency by 20% [whitepaper.pdf]."
-If no <source_id> is present, the response should omit the citation.
+If the user asks about a specific topic and the information is found in a source with a provided id attribute, the response should include the citation like in the following example:
+* "According to the study, the proposed method increases efficiency by 20% [1]."
 
 ### Output:
-Provide a clear and direct response to the user's query, including inline citations in the format [source_id] only when the <source_id> tag is present in the context.
+Provide a clear and direct response to the user's query, including inline citations in the format [id] only when the <source> tag with id attribute is present in the context.
 
 <context>
 {{CONTEXT}}
 
@@ -105,6 +105,8 @@
     OPENAI_API_CONFIGS,
     # Direct Connections
     ENABLE_DIRECT_CONNECTIONS,
+    # Tool Server Configs
+    TOOL_SERVER_CONNECTIONS,
     # Code Execution
     ENABLE_CODE_EXECUTION,
     CODE_EXECUTION_ENGINE,
@@ -191,6 +193,7 @@
     DOCLING_SERVER_URL,
     DOCUMENT_INTELLIGENCE_ENDPOINT,
     DOCUMENT_INTELLIGENCE_KEY,
+    MISTRAL_OCR_API_KEY,
     RAG_TOP_K,
     RAG_TOP_K_RERANKER,
     RAG_TEXT_SPLITTER,
@@ -355,6 +358,7 @@
 
 from open_webui.utils.auth import (
     get_license_data,
+    get_http_authorization_cred,
     decode_token,
     get_admin_user,
     get_verified_user,
@@ -477,6 +481,15 @@ async def lifespan(app: FastAPI):
 
 app.state.OPENAI_MODELS = {}
 
+########################################
+#
+# TOOL SERVERS
+#
+########################################
+
+app.state.config.TOOL_SERVER_CONNECTIONS = TOOL_SERVER_CONNECTIONS
+app.state.TOOL_SERVERS = []
+
 ########################################
 #
 # DIRECT CONNECTIONS
@@ -582,6 +595,7 @@ async def lifespan(app: FastAPI):
 app.state.config.DOCLING_SERVER_URL = DOCLING_SERVER_URL
 app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT = DOCUMENT_INTELLIGENCE_ENDPOINT
 app.state.config.DOCUMENT_INTELLIGENCE_KEY = DOCUMENT_INTELLIGENCE_KEY
+app.state.config.MISTRAL_OCR_API_KEY = MISTRAL_OCR_API_KEY
 
 app.state.config.TEXT_SPLITTER = RAG_TEXT_SPLITTER
 app.state.config.TIKTOKEN_ENCODING_NAME = TIKTOKEN_ENCODING_NAME
@@ -862,6 +876,10 @@ async def commit_session_after_request(request: Request, call_next):
 @app.middleware("http")
 async def check_url(request: Request, call_next):
     start_time = int(time.time())
+    request.state.token = get_http_authorization_cred(
+        request.headers.get("Authorization")
+    )
+
     request.state.enable_api_key = app.state.config.ENABLE_API_KEY
     response = await call_next(request)
     process_time = int(time.time()) - start_time
 
@@ -20,6 +20,9 @@
     YoutubeLoader,
 )
 from langchain_core.documents import Document
+
+from open_webui.retrieval.loaders.mistral import MistralLoader
+
 from open_webui.env import SRC_LOG_LEVELS, GLOBAL_LOG_LEVEL
 
 logging.basicConfig(stream=sys.stdout, level=GLOBAL_LOG_LEVEL)
@@ -181,13 +184,16 @@ def load(
             for doc in docs
         ]
 
+    def _is_text_file(self, file_ext: str, file_content_type: str) -> bool:
+        return file_ext in known_source_ext or (
+            file_content_type and file_content_type.find("text/") >= 0
+        )
+
     def _get_loader(self, filename: str, file_content_type: str, file_path: str):
         file_ext = filename.split(".")[-1].lower()
 
         if self.engine == "tika" and self.kwargs.get("TIKA_SERVER_URL"):
-            if file_ext in known_source_ext or (
-                file_content_type and file_content_type.find("text/") >= 0
-            ):
+            if self._is_text_file(file_ext, file_content_type):
                 loader = TextLoader(file_path, autodetect_encoding=True)
             else:
                 loader = TikaLoader(
@@ -196,11 +202,14 @@ def _get_loader(self, filename: str, file_content_type: str, file_path: str):
                     mime_type=file_content_type,
                 )
         elif self.engine == "docling" and self.kwargs.get("DOCLING_SERVER_URL"):
-            loader = DoclingLoader(
-                url=self.kwargs.get("DOCLING_SERVER_URL"),
-                file_path=file_path,
-                mime_type=file_content_type,
-            )
+            if self._is_text_file(file_ext, file_content_type):
+                loader = TextLoader(file_path, autodetect_encoding=True)
+            else:
+                loader = DoclingLoader(
+                    url=self.kwargs.get("DOCLING_SERVER_URL"),
+                    file_path=file_path,
+                    mime_type=file_content_type,
+                )
         elif (
             self.engine == "document_intelligence"
             and self.kwargs.get("DOCUMENT_INTELLIGENCE_ENDPOINT") != ""
@@ -222,6 +231,15 @@ def _get_loader(self, filename: str, file_content_type: str, file_path: str):
                 api_endpoint=self.kwargs.get("DOCUMENT_INTELLIGENCE_ENDPOINT"),
                 api_key=self.kwargs.get("DOCUMENT_INTELLIGENCE_KEY"),
             )
+        elif (
+            self.engine == "mistral_ocr"
+            and self.kwargs.get("MISTRAL_OCR_API_KEY") != ""
+            and file_ext
+            in ["pdf"]  # Mistral OCR currently only supports PDF and images
+        ):
+            loader = MistralLoader(
+                api_key=self.kwargs.get("MISTRAL_OCR_API_KEY"), file_path=file_path
+            )
         else:
             if file_ext == "pdf":
                 loader = PyPDFLoader(
@@ -257,9 +275,7 @@ def _get_loader(self, filename: str, file_content_type: str, file_path: str):
                 loader = UnstructuredPowerPointLoader(file_path)
             elif file_ext == "msg":
                 loader = OutlookMessageLoader(file_path)
-            elif file_ext in known_source_ext or (
-                file_content_type and file_content_type.find("text/") >= 0
-            ):
+            elif self._is_text_file(file_ext, file_content_type):
                 loader = TextLoader(file_path, autodetect_encoding=True)
             else:
                 loader = TextLoader(file_path, autodetect_encoding=True)