fix(gateway): pricing lookup uses wrong separator, cost always NULL (#827)

sarmientoF · claude · njbrake · web-flow · commit b86ff1bd6999 · 2026-02-23T08:03:28.000-05:00
## Description

`_log_usage` in `chat.py` builds the pricing `model_key` using colon
separator (`provider:model`), but `pricing_init.py` stores keys using
slash separator (`provider/model`). This mismatch means the DB lookup
never finds a match, so `cost` is always `NULL` and user spend is never
updated for chat completions.

The fix tries `provider/model` first (matching the convention used by
`pricing_init.py`, `audio.py`, and `search.py`), then falls back to
`provider:model` for backwards compatibility with pricing configured via
the API using colon format.

### Why the existing tests don't catch this

The `model_pricing` fixture in `conftest.py` creates pricing with
`model_key = "gemini:gemini-2.5-flash"` (colon format), which happens to
match the old lookup. In production, pricing seeded via
`pricing_init.py` uses slash format — the two never match.

## PR Type
- [x] Bug Fix

## Relevant Issues
N/A

## Checklist
- [x] I have read and understand the existing codebase and relevant
files
- [x] I have tested these changes locally
- [x] Tests pass with my changes
- [ ] I have updated documentation (if applicable)
- [x] I have read the [Contributing Guidelines](CONTRIBUTING.md)
- [x] I have checked my code follows the project's code style
- [ ] I am an AI Agent filling out this form (check box if true)

## AI Disclosure
- **AI Model used**: Claude Opus 4.6
- **AI Developer Tool used**: Claude Code
- **Additional context**: Bug was discovered during production debugging
(cost column showing NULL for all chat completions). The fix and PR were
pair-programmed with Claude Code.

---------

Co-authored-by: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
Co-authored-by: njbrake &lt;njbrake@gmail.com&gt;
Co-authored-by: Nathan Brake &lt;33383515+njbrake@users.noreply.github.com&gt;
diff --git a/src/any_llm/gateway/pricing_init.py b/src/any_llm/gateway/pricing_init.py
@@ -28,8 +28,9 @@ def initialize_pricing_from_config(config: GatewayConfig, db: Session) -> None:
 
     logger.info(f"Loading pricing configuration for {len(config.pricing)} model(s)")
 
-    for model_key, pricing_config in config.pricing.items():
-        provider, _ = AnyLLM.split_model_provider(model_key)
+    for raw_model_key, pricing_config in config.pricing.items():
+        provider, model_name = AnyLLM.split_model_provider(raw_model_key)
+        model_key = f"{provider.value}:{model_name}"
 
         if provider.value not in config.providers:
             msg = (
diff --git a/src/any_llm/gateway/routes/chat.py b/src/any_llm/gateway/routes/chat.py
@@ -122,7 +122,10 @@ async def _log_usage(
         usage_log.total_tokens = usage_data.total_tokens
 
         model_key = f"{provider}:{model}" if provider else model
+        model_key_legacy = f"{provider}/{model}" if provider else None
         pricing = db.query(ModelPricing).filter(ModelPricing.model_key == model_key).first()
+        if not pricing and model_key_legacy:
+            pricing = db.query(ModelPricing).filter(ModelPricing.model_key == model_key_legacy).first()
 
         if pricing:
             cost = (usage_data.prompt_tokens / 1_000_000) * pricing.input_price_per_million + (
@@ -135,7 +138,8 @@ async def _log_usage(
                 if user:
                     user.spend = float(user.spend) + cost
         else:
-            logger.warning(f"No pricing configured for model '{model_key}'. Usage will be tracked without cost.")
+            attempted = f"'{model_key}'" + (f" or '{model_key_legacy}'" if model_key_legacy else "")
+            logger.warning(f"No pricing configured for {attempted}. Usage will be tracked without cost.")
 
     db.add(usage_log)
     try:
diff --git a/src/any_llm/gateway/routes/pricing.py b/src/any_llm/gateway/routes/pricing.py
@@ -4,6 +4,7 @@
 from pydantic import BaseModel, Field
 from sqlalchemy.orm import Session
 
+from any_llm.any_llm import AnyLLM
 from any_llm.gateway.auth import verify_master_key
 from any_llm.gateway.db import ModelPricing, get_db
 
@@ -34,14 +35,16 @@ async def set_pricing(
     db: Annotated[Session, Depends(get_db)],
 ) -> PricingResponse:
     """Set or update pricing for a model."""
-    pricing = db.query(ModelPricing).filter(ModelPricing.model_key == request.model_key).first()
+    provider, model_name = AnyLLM.split_model_provider(request.model_key)
+    normalized_key = f"{provider.value}:{model_name}"
+    pricing = db.query(ModelPricing).filter(ModelPricing.model_key == normalized_key).first()
 
     if pricing:
         pricing.input_price_per_million = request.input_price_per_million
         pricing.output_price_per_million = request.output_price_per_million
     else:
         pricing = ModelPricing(
-            model_key=request.model_key,
+            model_key=normalized_key,
             input_price_per_million=request.input_price_per_million,
             output_price_per_million=request.output_price_per_million,
         )
diff --git a/tests/gateway/test_pricing_config.py b/tests/gateway/test_pricing_config.py
@@ -8,7 +8,10 @@
 
 from any_llm.gateway.config import GatewayConfig, PricingConfig
 from any_llm.gateway.db import ModelPricing, get_db
+from any_llm.gateway.db.models import UsageLog
+from any_llm.gateway.routes.chat import _log_usage
 from any_llm.gateway.server import create_app
+from any_llm.types.completion import CompletionUsage
 
 
 def test_pricing_loaded_from_config(postgres_url: str, test_db: Session) -> None:
@@ -116,6 +119,59 @@ def test_pricing_validation_requires_configured_provider(postgres_url: str, test
         create_app(config)
 
 
+def test_pricing_loaded_from_config_normalizes_legacy_slash_format(postgres_url: str, test_db: Session) -> None:
+    """Test that pricing configured with legacy slash format is normalized to colon format."""
+    config = GatewayConfig(
+        database_url=postgres_url,
+        master_key="test-master-key",
+        host="127.0.0.1",
+        port=8000,
+        providers={"openai": {"api_key": "test-key"}},
+        pricing={
+            "openai/gpt-4": PricingConfig(
+                input_price_per_million=30.0,
+                output_price_per_million=60.0,
+            ),
+        },
+    )
+
+    app = create_app(config)
+
+    def override_get_db() -> Any:
+        yield test_db
+
+    app.dependency_overrides[get_db] = override_get_db
+
+    with TestClient(app):
+        # Pricing should be stored with canonical colon format, not slash
+        pricing_slash = test_db.query(ModelPricing).filter(ModelPricing.model_key == "openai/gpt-4").first()
+        assert pricing_slash is None, "Pricing should not be stored with legacy slash format"
+
+        pricing_colon = test_db.query(ModelPricing).filter(ModelPricing.model_key == "openai:gpt-4").first()
+        assert pricing_colon is not None, "Pricing should be stored with canonical colon format"
+        assert pricing_colon.input_price_per_million == 30.0
+        assert pricing_colon.output_price_per_million == 60.0
+
+
+def test_set_pricing_api_normalizes_legacy_slash_format(
+    client: TestClient,
+    master_key_header: dict[str, str],
+) -> None:
+    """Test that the pricing API normalizes legacy slash format to colon format."""
+    response = client.post(
+        "/v1/pricing",
+        json={
+            "model_key": "gemini/gemini-2.5-flash",
+            "input_price_per_million": 0.075,
+            "output_price_per_million": 0.30,
+        },
+        headers=master_key_header,
+    )
+    assert response.status_code == 200
+    data = response.json()
+    assert data["model_key"] == "gemini:gemini-2.5-flash", "API should normalize slash to colon format"
+
+
 def test_pricing_initialization_with_no_config(postgres_url: str, test_db: Session) -> None:
     """Test that app starts successfully when no pricing is configured."""
     config = GatewayConfig(
@@ -139,3 +195,62 @@ def override_get_db() -> Any:
         # No pricing should be in database
         pricing_count = test_db.query(ModelPricing).count()
         assert pricing_count == 0, "No pricing should be loaded when config is empty"
+
+
+@pytest.mark.asyncio
+async def test_log_usage_finds_pricing_with_legacy_slash_format(test_db: Session) -> None:
+    """Test that _log_usage falls back to legacy slash format when colon format is not found."""
+    # Simulate pricing stored with legacy slash format (e.g., from before normalization fix)
+    legacy_pricing = ModelPricing(
+        model_key="openai/gpt-4",
+        input_price_per_million=30.0,
+        output_price_per_million=60.0,
+    )
+    test_db.add(legacy_pricing)
+    test_db.commit()
+
+    usage = CompletionUsage(prompt_tokens=1000, completion_tokens=500, total_tokens=1500)
+
+    await _log_usage(
+        db=test_db,
+        api_key_obj=None,
+        model="gpt-4",
+        provider="openai",
+        endpoint="/v1/chat/completions",
+        usage_override=usage,
+    )
+
+    log = test_db.query(UsageLog).first()
+    assert log is not None
+    assert log.cost is not None, "Cost should be calculated via legacy slash format fallback"
+    expected_cost = (1000 / 1_000_000) * 30.0 + (500 / 1_000_000) * 60.0
+    assert abs(log.cost - expected_cost) < 0.0001
+
+
+@pytest.mark.asyncio
+async def test_log_usage_finds_pricing_with_colon_format(test_db: Session) -> None:
+    """Test that _log_usage finds pricing with canonical colon format."""
+    pricing = ModelPricing(
+        model_key="openai:gpt-4",
+        input_price_per_million=30.0,
+        output_price_per_million=60.0,
+    )
+    test_db.add(pricing)
+    test_db.commit()
+
+    usage = CompletionUsage(prompt_tokens=1000, completion_tokens=500, total_tokens=1500)
+
+    await _log_usage(
+        db=test_db,
+        api_key_obj=None,
+        model="gpt-4",
+        provider="openai",
+        endpoint="/v1/chat/completions",
+        usage_override=usage,
+    )
+
+    log = test_db.query(UsageLog).first()
+    assert log is not None
+    assert log.cost is not None, "Cost should be calculated with canonical colon format"
+    expected_cost = (1000 / 1_000_000) * 30.0 + (500 / 1_000_000) * 60.0
+    assert abs(log.cost - expected_cost) < 0.0001