Skip to content

Commit 987fbdb

Browse files
Move proxy to dedicated service
1 parent 4a86711 commit 987fbdb

File tree

19 files changed

+634
-676
lines changed

19 files changed

+634
-676
lines changed

.github/workflows/docker.yml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,14 @@ jobs:
5555
ghcr.io/makespacemadrid/litellm-updater-web:latest
5656
ghcr.io/makespacemadrid/litellm-updater-web:${{ steps.version.outputs.version }}
5757
ghcr.io/makespacemadrid/litellm-updater-web:${{ github.sha }}
58+
59+
- name: Build and push proxy image
60+
uses: docker/build-push-action@v6
61+
with:
62+
context: .
63+
file: Dockerfile
64+
push: true
65+
tags: |
66+
ghcr.io/makespacemadrid/litellm-updater-proxy:latest
67+
ghcr.io/makespacemadrid/litellm-updater-proxy:${{ steps.version.outputs.version }}
68+
ghcr.io/makespacemadrid/litellm-updater-proxy:${{ github.sha }}

Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ COPY pyproject.toml README.md /app/
1616
COPY litellm_updater /app/litellm_updater
1717
COPY backend /app/backend
1818
COPY frontend /app/frontend
19+
COPY proxy /app/proxy
1920
COPY shared /app/shared
2021
COPY example.env /app/env.example
2122
COPY scripts /app/scripts

backend/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
"""Backend sync worker service."""
22

3-
__version__ = "0.5.8"
3+
__version__ = "0.5.21"

backend/litellm_client.py

Lines changed: 73 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -193,12 +193,38 @@ async def push_model_to_litellm(
193193
litellm_params = await _build_litellm_params(provider, model, session)
194194

195195
# Build model_info with pricing overrides
196-
model_info = apply_pricing_overrides(
197-
model.effective_params.copy(),
198-
config=config,
199-
provider=provider,
200-
model=model,
201-
)
196+
if provider.type == "compat" and session and model.mapped_provider_id and model.mapped_model_id:
197+
from shared.crud import get_provider_by_id, get_model_by_provider_and_name
198+
199+
mapped_provider = await get_provider_by_id(session, model.mapped_provider_id)
200+
mapped_model = await get_model_by_provider_and_name(
201+
session, model.mapped_provider_id, model.mapped_model_id
202+
)
203+
if mapped_provider and mapped_model:
204+
model_info = apply_pricing_overrides(
205+
mapped_model.effective_params.copy(),
206+
config=config,
207+
provider=mapped_provider,
208+
model=mapped_model,
209+
)
210+
compat_overrides = model.effective_params.copy()
211+
compat_overrides.pop("tags", None)
212+
compat_overrides.pop("mode", None)
213+
model_info.update(compat_overrides)
214+
else:
215+
model_info = apply_pricing_overrides(
216+
model.effective_params.copy(),
217+
config=config,
218+
provider=provider,
219+
model=model,
220+
)
221+
else:
222+
model_info = apply_pricing_overrides(
223+
model.effective_params.copy(),
224+
config=config,
225+
provider=provider,
226+
model=model,
227+
)
202228

203229
# Copy pricing fields into litellm_params so LiteLLM can bill requests
204230
_merge_pricing_fields(litellm_params, model_info)
@@ -211,28 +237,16 @@ async def push_model_to_litellm(
211237
elif provider.type == "ollama":
212238
model_info["mode"] = ollama_mode
213239
model_info["litellm_provider"] = "openai" if ollama_mode == "openai" else "ollama"
214-
elif provider.type == "completion":
215-
model_info["mode"] = "completion"
216-
if session and model.mapped_provider_id:
217-
from shared.crud import get_provider_by_id, get_model_by_provider_and_name
218-
219-
mapped_provider = await get_provider_by_id(session, model.mapped_provider_id)
220-
mapped_model = None
221-
if mapped_provider:
222-
mapped_model = await get_model_by_provider_and_name(
223-
session, model.mapped_provider_id, model.mapped_model_id or ""
224-
)
225-
226-
if mapped_provider:
227-
if mapped_provider.type == "openai":
228-
model_info["litellm_provider"] = "openai"
229-
elif mapped_provider.type == "ollama":
230-
mapped_ollama_mode = (
231-
(mapped_model.ollama_mode if mapped_model else None)
232-
or mapped_provider.default_ollama_mode
233-
or "ollama_chat"
234-
)
235-
model_info["litellm_provider"] = "openai" if mapped_ollama_mode == "openai" else "ollama"
240+
elif provider.type == "compat":
241+
compat_mode = _get_compat_mode(model)
242+
model_info.setdefault("mode", "completion" if compat_mode == "completion" else "chat")
243+
if compat_mode == "completion":
244+
model_info["supports_completion"] = True
245+
compat_model = litellm_params.get("model", "")
246+
if compat_model.startswith(("ollama/", "ollama_chat/")):
247+
model_info["litellm_provider"] = "ollama"
248+
elif compat_model:
249+
model_info["litellm_provider"] = "openai"
236250

237251
# Generate tags
238252
from shared.models import ModelMetadata as PydanticModelMetadata
@@ -245,6 +259,15 @@ async def push_model_to_litellm(
245259
provider_tags=provider.tags_list,
246260
mode=ollama_mode if provider.type != "compat" else None
247261
)
262+
if provider.type == "compat":
263+
compat_mode = _get_compat_mode(model)
264+
tags = [t for t in tags if not t.startswith("mode:")]
265+
if compat_mode == "completion":
266+
tags = [t for t in tags if t not in {"capability:chat", "capability:completion"}]
267+
tags.extend(["capability:completion", "mode:completion"])
268+
else:
269+
tags = [t for t in tags if t != "capability:completion"]
270+
tags.append("mode:chat")
248271

249272
litellm_params["tags"] = tags
250273
model_info["tags"] = tags
@@ -368,6 +391,12 @@ async def _needs_update(provider, model, litellm_model, config=None, session=Non
368391
return False
369392

370393

394+
def _get_compat_mode(model) -> str:
395+
"""Return compat mode (chat or completion)."""
396+
mode = (model.user_params_dict or {}).get("mode")
397+
return "completion" if mode == "completion" else "chat"
398+
399+
371400
async def _build_litellm_params(provider, model, session=None) -> dict:
372401
"""Build litellm_params for a model."""
373402
litellm_params = {}
@@ -394,6 +423,9 @@ async def _build_litellm_params(provider, model, session=None) -> dict:
394423
litellm_params["api_base"] = provider.base_url
395424
elif provider.type == "compat":
396425
# Compat models need to resolve their mapping to the actual provider/model
426+
compat_mode = _get_compat_mode(model)
427+
if compat_mode == "completion":
428+
litellm_params["supports_completion"] = True
397429
if not session:
398430
logger.warning("No session provided for compat model, using model_id as-is")
399431
litellm_params["model"] = model_id
@@ -420,76 +452,34 @@ async def _build_litellm_params(provider, model, session=None) -> dict:
420452

421453
# Build params based on mapped provider type
422454
if mapped_provider.type == "openai":
423-
litellm_params["model"] = f"openai/{model.mapped_model_id}"
455+
if compat_mode == "completion":
456+
litellm_params["model"] = f"text-completion-openai/{model.mapped_model_id}"
457+
else:
458+
litellm_params["model"] = f"openai/{model.mapped_model_id}"
424459
litellm_params["api_base"] = mapped_provider.base_url
425460
litellm_params["api_key"] = mapped_provider.api_key or "sk-1234"
426461
elif mapped_provider.type == "ollama":
427462
if mapped_ollama_mode == "openai":
428-
litellm_params["model"] = f"openai/{model.mapped_model_id}"
463+
if compat_mode == "completion":
464+
litellm_params["model"] = f"text-completion-openai/{model.mapped_model_id}"
465+
else:
466+
litellm_params["model"] = f"openai/{model.mapped_model_id}"
429467
api_base = mapped_provider.base_url.rstrip("/")
430468
litellm_params["api_base"] = f"{api_base}/v1"
431469
litellm_params["api_key"] = mapped_provider.api_key or "sk-1234"
432470
elif mapped_ollama_mode == "ollama":
433471
litellm_params["model"] = f"ollama/{model.mapped_model_id}"
434472
litellm_params["api_base"] = mapped_provider.base_url
435473
else:
436-
# Use ollama_chat as the preferred default
437-
litellm_params["model"] = f"ollama_chat/{model.mapped_model_id}"
474+
# Use ollama_chat as the preferred default for chat mode
475+
if compat_mode == "completion":
476+
litellm_params["model"] = f"ollama/{model.mapped_model_id}"
477+
else:
478+
litellm_params["model"] = f"ollama_chat/{model.mapped_model_id}"
438479
litellm_params["api_base"] = mapped_provider.base_url
439480
else:
440481
logger.warning(f"Unsupported mapped provider type {mapped_provider.type} for compat model {model_id}")
441482
litellm_params["model"] = model_id
442-
elif provider.type == "completion":
443-
# Completion models resolve to a source provider/model but use completion routing
444-
litellm_params["supports_completion"] = True
445-
if not session:
446-
logger.warning("No session provided for completion model, using model_id as-is")
447-
litellm_params["model"] = model_id
448-
else:
449-
from shared.crud import get_provider_by_id, get_model_by_provider_and_name
450-
451-
if not model.mapped_provider_id or not model.mapped_model_id:
452-
logger.warning(f"Completion model {model_id} missing mapping, using model_id as-is")
453-
litellm_params["model"] = model_id
454-
else:
455-
mapped_provider = await get_provider_by_id(session, model.mapped_provider_id)
456-
if not mapped_provider:
457-
logger.warning(
458-
"Mapped provider %s not found for completion model %s",
459-
model.mapped_provider_id,
460-
model_id,
461-
)
462-
litellm_params["model"] = model_id
463-
else:
464-
mapped_model = await get_model_by_provider_and_name(
465-
session, model.mapped_provider_id, model.mapped_model_id
466-
)
467-
mapped_ollama_mode = (
468-
(mapped_model.ollama_mode if mapped_model else None)
469-
or mapped_provider.default_ollama_mode
470-
or "ollama_chat"
471-
)
472-
473-
if mapped_provider.type == "openai":
474-
litellm_params["model"] = f"text-completion-openai/{model.mapped_model_id}"
475-
litellm_params["api_base"] = mapped_provider.base_url
476-
litellm_params["api_key"] = mapped_provider.api_key or "sk-1234"
477-
elif mapped_provider.type == "ollama":
478-
if mapped_ollama_mode == "openai":
479-
litellm_params["model"] = f"text-completion-openai/{model.mapped_model_id}"
480-
api_base = mapped_provider.base_url.rstrip("/")
481-
litellm_params["api_base"] = f"{api_base}/v1"
482-
litellm_params["api_key"] = mapped_provider.api_key or "sk-1234"
483-
else:
484-
litellm_params["model"] = f"ollama/{model.mapped_model_id}"
485-
litellm_params["api_base"] = mapped_provider.base_url
486-
else:
487-
logger.warning(
488-
"Unsupported mapped provider type %s for completion model %s",
489-
mapped_provider.type,
490-
model_id,
491-
)
492-
litellm_params["model"] = model_id
493483

494484
return litellm_params
495485

docker-compose.yml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,28 @@ services:
3939
labels:
4040
com.centurylinklabs.watchtower.enable: "true"
4141

42+
# OpenAI-compatible proxy (chat/completions)
43+
model-updater-proxy:
44+
image: ghcr.io/makespacemadrid/litellm-updater-proxy:latest
45+
build:
46+
context: .
47+
dockerfile: Dockerfile
48+
command: uvicorn proxy.api:create_app --factory --host 0.0.0.0 --port 8000
49+
container_name: model-updater-proxy
50+
restart: unless-stopped
51+
ports:
52+
- "${PROXY_PORT:-4002}:8000"
53+
volumes:
54+
- ./data:/app/data
55+
environment:
56+
- LOG_LEVEL=${LOG_LEVEL:-info}
57+
- PROXY_LOG_REQUESTS=${PROXY_LOG_REQUESTS:-false}
58+
- PROXY_LOG_BODY=${PROXY_LOG_BODY:-false}
59+
networks:
60+
- litellm
61+
labels:
62+
com.centurylinklabs.watchtower.enable: "true"
63+
4264
litellm:
4365
image: ghcr.io/berriai/litellm:main-stable
4466
ports:

frontend/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
"""Frontend API and UI service."""
22

3-
__version__ = "0.5.8"
3+
__version__ = "0.5.21"

frontend/api.py

Lines changed: 4 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from pathlib import Path
66
from contextlib import asynccontextmanager
77

8-
from fastapi import FastAPI, Request, Depends, Form
8+
from fastapi import FastAPI, Request, Depends, Form, HTTPException
99
from fastapi.staticfiles import StaticFiles
1010
from fastapi.templating import Jinja2Templates
1111
from fastapi.responses import HTMLResponse
@@ -16,7 +16,7 @@
1616

1717
from shared.database import create_engine, init_session_maker, get_session, ensure_minimum_schema
1818
from shared.crud import get_all_providers, get_config, get_provider_by_id
19-
from frontend.routes import providers, models, admin, compat, litellm, completion
19+
from frontend.routes import providers, models, admin, compat, litellm
2020
from backend import provider_sync
2121
from sqlalchemy import select, func
2222
from shared.db_models import Model
@@ -26,6 +26,7 @@
2626
logger = logging.getLogger(__name__)
2727

2828

29+
2930
@asynccontextmanager
3031
async def lifespan(app: FastAPI):
3132
"""Initialize database on startup."""
@@ -70,8 +71,7 @@ def _human_source_type(source_type: str) -> str:
7071
type_names = {
7172
"ollama": "Ollama",
7273
"openai": "OpenAI-compatible",
73-
"compat": "Compat",
74-
"completion": "Completion"
74+
"compat": "Compat"
7575
}
7676
return type_names.get(source_type, source_type)
7777

@@ -82,7 +82,6 @@ def _human_source_type(source_type: str) -> str:
8282
app.include_router(models.router, prefix="/api/models", tags=["models"])
8383
app.include_router(admin.router, prefix="/api/admin", tags=["admin"])
8484
app.include_router(compat.router, prefix="/api/compat", tags=["compat"])
85-
app.include_router(completion.router, prefix="/api/completion", tags=["completion"])
8685
app.include_router(litellm.router, prefix="/litellm", tags=["litellm"])
8786

8887
# HTML Routes
@@ -218,25 +217,6 @@ async def compat_page(request: Request, session = Depends(get_session)):
218217
"config": config_dict
219218
})
220219

221-
@app.get("/completion", response_class=HTMLResponse)
222-
async def completion_page(request: Request, session = Depends(get_session)):
223-
"""Completion models page."""
224-
config = await get_config(session)
225-
config_dict = {
226-
"litellm": {
227-
"configured": bool(config.litellm_base_url),
228-
"base_url": config.litellm_base_url or "",
229-
"api_key": config.litellm_api_key or ""
230-
},
231-
"sync_interval_seconds": config.sync_interval_seconds,
232-
"default_pricing_profile": config.default_pricing_profile,
233-
"default_pricing_override": config.default_pricing_override_dict,
234-
}
235-
return templates.TemplateResponse("completion.html", {
236-
"request": request,
237-
"config": config_dict
238-
})
239-
240220
@app.get("/litellm", response_class=HTMLResponse)
241221
async def litellm_page(request: Request, session = Depends(get_session)):
242222
"""LiteLLM models page."""

0 commit comments

Comments
 (0)