makespacemadrid
diff --git a/‎.github/workflows/docker.yml‎
Lines changed: 11 additions & 0 deletions b/‎.github/workflows/docker.yml‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎Dockerfile‎
Lines changed: 1 addition & 0 deletions b/‎Dockerfile‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backend/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎backend/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backend/litellm_client.py‎
Lines changed: 73 additions & 83 deletions b/‎backend/litellm_client.py‎
Lines changed: 73 additions & 83 deletions
diff --git a/‎docker-compose.yml‎
Lines changed: 22 additions & 0 deletions b/‎docker-compose.yml‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎frontend/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎frontend/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎frontend/api.py‎
Lines changed: 4 additions & 24 deletions b/‎frontend/api.py‎
Lines changed: 4 additions & 24 deletions
@@ -55,3 +55,14 @@ jobs:
             ghcr.io/makespacemadrid/litellm-updater-web:latest
             ghcr.io/makespacemadrid/litellm-updater-web:${{ steps.version.outputs.version }}
             ghcr.io/makespacemadrid/litellm-updater-web:${{ github.sha }}
+
+      - name: Build and push proxy image
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          file: Dockerfile
+          push: true
+          tags: |
+            ghcr.io/makespacemadrid/litellm-updater-proxy:latest
+            ghcr.io/makespacemadrid/litellm-updater-proxy:${{ steps.version.outputs.version }}
+            ghcr.io/makespacemadrid/litellm-updater-proxy:${{ github.sha }}
@@ -16,6 +16,7 @@ COPY pyproject.toml README.md /app/
 COPY litellm_updater /app/litellm_updater
 COPY backend /app/backend
 COPY frontend /app/frontend
+COPY proxy /app/proxy
 COPY shared /app/shared
 COPY example.env /app/env.example
 COPY scripts /app/scripts
 
@@ -1,3 +1,3 @@
 """Backend sync worker service."""
 
-__version__ = "0.5.8"
+__version__ = "0.5.21"
@@ -193,12 +193,38 @@ async def push_model_to_litellm(
     litellm_params = await _build_litellm_params(provider, model, session)
 
     # Build model_info with pricing overrides
-    model_info = apply_pricing_overrides(
-        model.effective_params.copy(),
-        config=config,
-        provider=provider,
-        model=model,
-    )
+    if provider.type == "compat" and session and model.mapped_provider_id and model.mapped_model_id:
+        from shared.crud import get_provider_by_id, get_model_by_provider_and_name
+
+        mapped_provider = await get_provider_by_id(session, model.mapped_provider_id)
+        mapped_model = await get_model_by_provider_and_name(
+            session, model.mapped_provider_id, model.mapped_model_id
+        )
+        if mapped_provider and mapped_model:
+            model_info = apply_pricing_overrides(
+                mapped_model.effective_params.copy(),
+                config=config,
+                provider=mapped_provider,
+                model=mapped_model,
+            )
+            compat_overrides = model.effective_params.copy()
+            compat_overrides.pop("tags", None)
+            compat_overrides.pop("mode", None)
+            model_info.update(compat_overrides)
+        else:
+            model_info = apply_pricing_overrides(
+                model.effective_params.copy(),
+                config=config,
+                provider=provider,
+                model=model,
+            )
+    else:
+        model_info = apply_pricing_overrides(
+            model.effective_params.copy(),
+            config=config,
+            provider=provider,
+            model=model,
+        )
 
     # Copy pricing fields into litellm_params so LiteLLM can bill requests
     _merge_pricing_fields(litellm_params, model_info)
@@ -211,28 +237,16 @@ async def push_model_to_litellm(
     elif provider.type == "ollama":
         model_info["mode"] = ollama_mode
         model_info["litellm_provider"] = "openai" if ollama_mode == "openai" else "ollama"
-    elif provider.type == "completion":
-        model_info["mode"] = "completion"
-        if session and model.mapped_provider_id:
-            from shared.crud import get_provider_by_id, get_model_by_provider_and_name
-
-            mapped_provider = await get_provider_by_id(session, model.mapped_provider_id)
-            mapped_model = None
-            if mapped_provider:
-                mapped_model = await get_model_by_provider_and_name(
-                    session, model.mapped_provider_id, model.mapped_model_id or ""
-                )
-
-            if mapped_provider:
-                if mapped_provider.type == "openai":
-                    model_info["litellm_provider"] = "openai"
-                elif mapped_provider.type == "ollama":
-                    mapped_ollama_mode = (
-                        (mapped_model.ollama_mode if mapped_model else None)
-                        or mapped_provider.default_ollama_mode
-                        or "ollama_chat"
-                    )
-                    model_info["litellm_provider"] = "openai" if mapped_ollama_mode == "openai" else "ollama"
+    elif provider.type == "compat":
+        compat_mode = _get_compat_mode(model)
+        model_info.setdefault("mode", "completion" if compat_mode == "completion" else "chat")
+        if compat_mode == "completion":
+            model_info["supports_completion"] = True
+        compat_model = litellm_params.get("model", "")
+        if compat_model.startswith(("ollama/", "ollama_chat/")):
+            model_info["litellm_provider"] = "ollama"
+        elif compat_model:
+            model_info["litellm_provider"] = "openai"
 
     # Generate tags
     from shared.models import ModelMetadata as PydanticModelMetadata
@@ -245,6 +259,15 @@ async def push_model_to_litellm(
         provider_tags=provider.tags_list,
         mode=ollama_mode if provider.type != "compat" else None
     )
+    if provider.type == "compat":
+        compat_mode = _get_compat_mode(model)
+        tags = [t for t in tags if not t.startswith("mode:")]
+        if compat_mode == "completion":
+            tags = [t for t in tags if t not in {"capability:chat", "capability:completion"}]
+            tags.extend(["capability:completion", "mode:completion"])
+        else:
+            tags = [t for t in tags if t != "capability:completion"]
+            tags.append("mode:chat")
 
     litellm_params["tags"] = tags
     model_info["tags"] = tags
@@ -368,6 +391,12 @@ async def _needs_update(provider, model, litellm_model, config=None, session=Non
     return False
 
 
+def _get_compat_mode(model) -> str:
+    """Return compat mode (chat or completion)."""
+    mode = (model.user_params_dict or {}).get("mode")
+    return "completion" if mode == "completion" else "chat"
+
+
 async def _build_litellm_params(provider, model, session=None) -> dict:
     """Build litellm_params for a model."""
     litellm_params = {}
@@ -394,6 +423,9 @@ async def _build_litellm_params(provider, model, session=None) -> dict:
             litellm_params["api_base"] = provider.base_url
     elif provider.type == "compat":
         # Compat models need to resolve their mapping to the actual provider/model
+        compat_mode = _get_compat_mode(model)
+        if compat_mode == "completion":
+            litellm_params["supports_completion"] = True
         if not session:
             logger.warning("No session provided for compat model, using model_id as-is")
             litellm_params["model"] = model_id
@@ -420,76 +452,34 @@ async def _build_litellm_params(provider, model, session=None) -> dict:
 
                     # Build params based on mapped provider type
                     if mapped_provider.type == "openai":
-                        litellm_params["model"] = f"openai/{model.mapped_model_id}"
+                        if compat_mode == "completion":
+                            litellm_params["model"] = f"text-completion-openai/{model.mapped_model_id}"
+                        else:
+                            litellm_params["model"] = f"openai/{model.mapped_model_id}"
                         litellm_params["api_base"] = mapped_provider.base_url
                         litellm_params["api_key"] = mapped_provider.api_key or "sk-1234"
                     elif mapped_provider.type == "ollama":
                         if mapped_ollama_mode == "openai":
-                            litellm_params["model"] = f"openai/{model.mapped_model_id}"
+                            if compat_mode == "completion":
+                                litellm_params["model"] = f"text-completion-openai/{model.mapped_model_id}"
+                            else:
+                                litellm_params["model"] = f"openai/{model.mapped_model_id}"
                             api_base = mapped_provider.base_url.rstrip("/")
                             litellm_params["api_base"] = f"{api_base}/v1"
                             litellm_params["api_key"] = mapped_provider.api_key or "sk-1234"
                         elif mapped_ollama_mode == "ollama":
                             litellm_params["model"] = f"ollama/{model.mapped_model_id}"
                             litellm_params["api_base"] = mapped_provider.base_url
                         else:
-                            # Use ollama_chat as the preferred default
-                            litellm_params["model"] = f"ollama_chat/{model.mapped_model_id}"
+                            # Use ollama_chat as the preferred default for chat mode
+                            if compat_mode == "completion":
+                                litellm_params["model"] = f"ollama/{model.mapped_model_id}"
+                            else:
+                                litellm_params["model"] = f"ollama_chat/{model.mapped_model_id}"
                             litellm_params["api_base"] = mapped_provider.base_url
                     else:
                         logger.warning(f"Unsupported mapped provider type {mapped_provider.type} for compat model {model_id}")
                         litellm_params["model"] = model_id
-    elif provider.type == "completion":
-        # Completion models resolve to a source provider/model but use completion routing
-        litellm_params["supports_completion"] = True
-        if not session:
-            logger.warning("No session provided for completion model, using model_id as-is")
-            litellm_params["model"] = model_id
-        else:
-            from shared.crud import get_provider_by_id, get_model_by_provider_and_name
-
-            if not model.mapped_provider_id or not model.mapped_model_id:
-                logger.warning(f"Completion model {model_id} missing mapping, using model_id as-is")
-                litellm_params["model"] = model_id
-            else:
-                mapped_provider = await get_provider_by_id(session, model.mapped_provider_id)
-                if not mapped_provider:
-                    logger.warning(
-                        "Mapped provider %s not found for completion model %s",
-                        model.mapped_provider_id,
-                        model_id,
-                    )
-                    litellm_params["model"] = model_id
-                else:
-                    mapped_model = await get_model_by_provider_and_name(
-                        session, model.mapped_provider_id, model.mapped_model_id
-                    )
-                    mapped_ollama_mode = (
-                        (mapped_model.ollama_mode if mapped_model else None)
-                        or mapped_provider.default_ollama_mode
-                        or "ollama_chat"
-                    )
-
-                    if mapped_provider.type == "openai":
-                        litellm_params["model"] = f"text-completion-openai/{model.mapped_model_id}"
-                        litellm_params["api_base"] = mapped_provider.base_url
-                        litellm_params["api_key"] = mapped_provider.api_key or "sk-1234"
-                    elif mapped_provider.type == "ollama":
-                        if mapped_ollama_mode == "openai":
-                            litellm_params["model"] = f"text-completion-openai/{model.mapped_model_id}"
-                            api_base = mapped_provider.base_url.rstrip("/")
-                            litellm_params["api_base"] = f"{api_base}/v1"
-                            litellm_params["api_key"] = mapped_provider.api_key or "sk-1234"
-                        else:
-                            litellm_params["model"] = f"ollama/{model.mapped_model_id}"
-                            litellm_params["api_base"] = mapped_provider.base_url
-                    else:
-                        logger.warning(
-                            "Unsupported mapped provider type %s for completion model %s",
-                            mapped_provider.type,
-                            model_id,
-                        )
-                        litellm_params["model"] = model_id
 
     return litellm_params
 
 
@@ -39,6 +39,28 @@ services:
     labels:
       com.centurylinklabs.watchtower.enable: "true"
 
+  # OpenAI-compatible proxy (chat/completions)
+  model-updater-proxy:
+    image: ghcr.io/makespacemadrid/litellm-updater-proxy:latest
+    build:
+      context: .
+      dockerfile: Dockerfile
+    command: uvicorn proxy.api:create_app --factory --host 0.0.0.0 --port 8000
+    container_name: model-updater-proxy
+    restart: unless-stopped
+    ports:
+      - "${PROXY_PORT:-4002}:8000"
+    volumes:
+      - ./data:/app/data
+    environment:
+      - LOG_LEVEL=${LOG_LEVEL:-info}
+      - PROXY_LOG_REQUESTS=${PROXY_LOG_REQUESTS:-false}
+      - PROXY_LOG_BODY=${PROXY_LOG_BODY:-false}
+    networks:
+      - litellm
+    labels:
+      com.centurylinklabs.watchtower.enable: "true"
+
   litellm:
     image: ghcr.io/berriai/litellm:main-stable
     ports:
 
@@ -1,3 +1,3 @@
 """Frontend API and UI service."""
 
-__version__ = "0.5.8"
+__version__ = "0.5.21"
@@ -5,7 +5,7 @@
 from pathlib import Path
 from contextlib import asynccontextmanager
 
-from fastapi import FastAPI, Request, Depends, Form
+from fastapi import FastAPI, Request, Depends, Form, HTTPException
 from fastapi.staticfiles import StaticFiles
 from fastapi.templating import Jinja2Templates
 from fastapi.responses import HTMLResponse
@@ -16,7 +16,7 @@
 
 from shared.database import create_engine, init_session_maker, get_session, ensure_minimum_schema
 from shared.crud import get_all_providers, get_config, get_provider_by_id
-from frontend.routes import providers, models, admin, compat, litellm, completion
+from frontend.routes import providers, models, admin, compat, litellm
 from backend import provider_sync
 from sqlalchemy import select, func
 from shared.db_models import Model
@@ -26,6 +26,7 @@
 logger = logging.getLogger(__name__)
 
 
+
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     """Initialize database on startup."""
@@ -70,8 +71,7 @@ def _human_source_type(source_type: str) -> str:
         type_names = {
             "ollama": "Ollama",
             "openai": "OpenAI-compatible",
-            "compat": "Compat",
-            "completion": "Completion"
+            "compat": "Compat"
         }
         return type_names.get(source_type, source_type)
 
@@ -82,7 +82,6 @@ def _human_source_type(source_type: str) -> str:
     app.include_router(models.router, prefix="/api/models", tags=["models"])
     app.include_router(admin.router, prefix="/api/admin", tags=["admin"])
     app.include_router(compat.router, prefix="/api/compat", tags=["compat"])
-    app.include_router(completion.router, prefix="/api/completion", tags=["completion"])
     app.include_router(litellm.router, prefix="/litellm", tags=["litellm"])
 
     # HTML Routes
@@ -218,25 +217,6 @@ async def compat_page(request: Request, session = Depends(get_session)):
             "config": config_dict
         })
 
-    @app.get("/completion", response_class=HTMLResponse)
-    async def completion_page(request: Request, session = Depends(get_session)):
-        """Completion models page."""
-        config = await get_config(session)
-        config_dict = {
-            "litellm": {
-                "configured": bool(config.litellm_base_url),
-                "base_url": config.litellm_base_url or "",
-                "api_key": config.litellm_api_key or ""
-            },
-            "sync_interval_seconds": config.sync_interval_seconds,
-            "default_pricing_profile": config.default_pricing_profile,
-            "default_pricing_override": config.default_pricing_override_dict,
-        }
-        return templates.TemplateResponse("completion.html", {
-            "request": request,
-            "config": config_dict
-        })
-
     @app.get("/litellm", response_class=HTMLResponse)
     async def litellm_page(request: Request, session = Depends(get_session)):
         """LiteLLM models page."""
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,3 @@`
`1`	`1`	`"""Backend sync worker service."""`
`2`	`2`
`3`		`-__version__ = "0.5.8"`
	`3`	`+__version__ = "0.5.21"`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,3 @@`
`1`	`1`	`"""Frontend API and UI service."""`
`2`	`2`
`3`		`-__version__ = "0.5.8"`
	`3`	`+__version__ = "0.5.21"`