EmbeddedLLM
diff --git a/‎.env.example‎
Lines changed: 5 additions & 0 deletions b/‎.env.example‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 5 additions & 3 deletions b/‎.github/workflows/ci.yml‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎Agents.md‎ ‎AGENTS.md‎Agents.md renamed to AGENTS.md b/‎Agents.md‎ ‎AGENTS.md‎Agents.md renamed to AGENTS.md
diff --git a/‎CLAUDE.md‎
Lines changed: 1 addition & 0 deletions b/‎CLAUDE.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docker/compose.base.yml‎
Lines changed: 15 additions & 0 deletions b/‎docker/compose.base.yml‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎services/api/pyproject.toml‎
Lines changed: 4 additions & 1 deletion b/‎services/api/pyproject.toml‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎services/api/src/owl/configs/oss.py‎
Lines changed: 5 additions & 1 deletion b/‎services/api/src/owl/configs/oss.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎services/api/src/owl/db/gen_executor.py‎
Lines changed: 26 additions & 3 deletions b/‎services/api/src/owl/db/gen_executor.py‎
Lines changed: 26 additions & 3 deletions
@@ -44,6 +44,7 @@ JAMAI_API_BASE=http://localhost:6969/api
 # OWL_TEST_LLM_API_BASE=http://localhost:6970/v1
 # OWL_S3_ENDPOINT=http://localhost:9000
 # OWL_FILE_PROXY_URL=website-url
+# OWL_GOTENBERG_URL=http://localhost:3001
 
 # Configuration
 OWL_PORT=6969
@@ -54,6 +55,10 @@ PB_MAX_CLIENT_CONN=500
 PB_MAX_CLIENT_CONN=80
 PG_MAX_CONNECTIONS=100
 
+# VLM OCR Config
+OWL_USE_VLM_OCR=true
+# OWL_VLM_MODEL_ID=
+
 # Frontend config
 HOST=localhost
 ORIGIN=http://localhost:4000
 
@@ -145,10 +145,12 @@ jobs:
           OWL_STRIPE_WEBHOOK_SECRET_TEST: ${{ secrets.OWL_STRIPE_WEBHOOK_SECRET_TEST }}
           OWL_STRIPE_PUBLISHABLE_KEY_TEST: ${{ secrets.OWL_STRIPE_PUBLISHABLE_KEY_TEST }}
 
-      - name: Configure Docker timeout
+      - name: Configure Docker daemon
         run: |
-          mkdir -p ~/.docker
-          echo '{"max-concurrent-downloads": 3, "max-download-attempts": 5}' > ~/.docker/config.json
+          DAEMON_JSON=/etc/docker/daemon.json
+          EXISTING=$(sudo cat "$DAEMON_JSON" 2>/dev/null || echo '{}')
+          echo "$EXISTING" | jq '. + {"max-concurrent-downloads": 3, "max-download-attempts": 5}' | sudo tee "$DAEMON_JSON"
+          sudo systemctl restart docker
 
       - name: Launch services
         id: launch_services
 
@@ -0,0 +1 @@
+Always read AGENTS.md and CONVENTIONS-BACKEND.md for guidelines.
@@ -215,6 +215,19 @@ services:
     networks:
       - jamai
 
+  gotenberg:
+    image: ghcr.io/embeddedllm/gotenberg/gotenberg:8
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
+      interval: 30s
+      timeout: 5s
+      retries: 5
+      start_period: 20s
+    ports:
+      - "3001:3000"
+    networks:
+      - jamai
+
   owl:
     build:
       context: ..
@@ -254,6 +267,8 @@ services:
         condition: service_healthy
       docling:
         condition: service_healthy
+      gotenberg:
+        condition: service_healthy
     healthcheck:
       test: ["CMD-SHELL", "curl --fail localhost:6969/api/health || exit 1"]
       interval: 10s
 
@@ -32,6 +32,7 @@ source = ["owl"]
 relative_files = true
 concurrency = ["multiprocessing", "thread", "greenlet"]
 parallel = true
+sigterm = true
 
 [tool.coverage.paths]
 source = ["src", "api/src"]
@@ -130,6 +131,7 @@ dependencies = [
     "email-validator~=2.2.0",
     "fastapi[standard]~=0.115.0",
     "flower~=2.0.0",
+    "griffe~=1.14.0",
     "gunicorn~=22.0.0",
     "httpx~=0.27",
     "itsdangerous~=2.2.0",
@@ -188,6 +190,7 @@ dependencies = [
     "pydantic-extra-types~=2.10.0",
     "pydantic-settings~=2.10.0",
     "pydantic[email,timezone]~=2.11.0",                     # 2.12 causes issues with sqlmodel Datetime
+    "pydantic-ai-slim[retry]~=1.59.0",
     "pydub~=0.25.0",
     "pyjwt~=2.10.0",
     "pylance==0.16.0",
@@ -208,7 +211,7 @@ dependencies = [
     "typing_extensions~=4.14.0",
     "uuid-utils~=0.9.0",
     "uuid7~=0.1.0",
-    "uvicorn[standard]~=0.28",
+    "uvicorn[standard]~=0.40.0",
     "xmltodict~=0.14.0",
 ]
 dynamic = ["version"]
 
@@ -43,6 +43,8 @@ class EnvConfig(BaseSettings):
     code_executor_endpoint: str = "http://kopi:3000"
     docling_url: str = "http://docling:5001"
     docling_timeout_sec: Annotated[int, Field(gt=0, le=60 * 60)] = 20 * 60
+    gotenberg_url: str = "http://gotenberg:3000"
+    gotenberg_timeout_sec: Annotated[int, Field(gt=0, le=60 * 60)] = 5 * 60
     test_llm_api_base: str = "http://test-llm:6970/v1"
     # Configs
     embed_file_upload_max_bytes: int = 200 * 1024 * 1024  # 200MiB in bytes
@@ -59,7 +61,9 @@ class EnvConfig(BaseSettings):
     max_write_batch_size: int = 100
     max_file_cache_size: int = 20
     # PDF Loader configs
-    fast_pdf_parsing: bool = True
+    use_vlm_ocr: bool = True  # Enable VLM OCR (otherwise use Docling OCR)
+    # VLM model ID for OCR, only used when use_vlm_ocr is True.
+    vlm_model_id: str = "openai/gpt-4o-mini"
     # LLM configs
     llm_timeout_sec: Annotated[int, Field(gt=0, le=60 * 60)] = 60
     embed_timeout_sec: Annotated[int, Field(gt=0, le=60 * 60)] = 60
 
@@ -1361,7 +1361,12 @@ async def _load_files(self, message: ChatThreadEntry) -> ChatEntry:
             if isinstance(c, TextContent):
                 contents.append(c)
             else:
-                data = await _load_uri_as_base64(c.uri)
+                data = await _load_uri_as_base64(
+                    c.uri,
+                    request=self.request,
+                    organization=self.organization,
+                    lm_engine=self.lm,
+                )
                 if getattr(self._col_map.get(c.column_name, None), "is_document_column", False):
                     # Document (data could be None)
                     replacements[c.column_name] = str(data)
@@ -1603,12 +1608,20 @@ def _resolve_image_output_type(image_bytes: bytes, mime_type: str | None) -> tup
     return mime or "image/png", extension
 
 
-async def _load_uri_as_base64(uri: str | None) -> str | AudioContent | ImageContent | None:
+async def _load_uri_as_base64(
+    uri: str | None,
+    request: Request | None = None,
+    organization: OrganizationRead | None = None,
+    lm_engine: LMEngine | None = None,
+) -> str | AudioContent | ImageContent | None:
     """
     Loads a file from URI for LLM inference.
 
     Args:
         uri (str | None): The URI of the file.
+        request (Request | None): FastAPI request object for VLM OCR.
+        organization (OrganizationRead | None): Organization for VLM OCR.
+        lm_engine (LMEngine | None): LMEngine instance for VLM OCR.
 
     Returns:
         content (str | AudioContent | ImageContent): The file content.
@@ -1635,7 +1648,17 @@ async def _load_uri_as_base64(uri: str | None) -> str | AudioContent | ImageCont
     try:
         # Load as document
         if extension in DOCUMENT_FILE_EXTENSIONS:
-            return await GeneralDocLoader().load_document(basename(uri), file_binary)
+            try:
+                return await GeneralDocLoader(
+                    request_id=request.state.id,
+                    lm_engine=lm_engine,
+                ).load_document(basename(uri), file_binary)
+            except Exception as e:
+                logger.warning(
+                    f'Failed to use VLM OCR for "{uri}": {repr(e)}, falling back to Docling.'
+                )
+                # Retry using Docling OCR as fallback
+                return await GeneralDocLoader().load_document(basename(uri), file_binary)
         # Load as audio or image
         else:
             base64_data = base64.b64encode(file_binary).decode("utf-8")
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+Always read AGENTS.md and CONVENTIONS-BACKEND.md for guidelines.`