Skip to content
12 changes: 7 additions & 5 deletions pr_agent/algo/ai_handlers/litellm_ai_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import json

MODEL_RETRIES = 2
DUMMY_LITELLM_API_KEY = "dummy_key" # placeholder set when no OpenAI key is configured


class LiteLLMAIHandler(BaseAiHandler):
Expand All @@ -39,7 +40,7 @@ def __init__(self):
openai.api_key = get_settings().openai.key
litellm.openai_key = get_settings().openai.key
elif 'OPENAI_API_KEY' not in os.environ:
litellm.api_key = "dummy_key"
litellm.api_key = DUMMY_LITELLM_API_KEY
if get_settings().get("aws.AWS_ACCESS_KEY_ID"):
assert get_settings().aws.AWS_SECRET_ACCESS_KEY and get_settings().aws.AWS_REGION_NAME, "AWS credentials are incomplete"
os.environ["AWS_ACCESS_KEY_ID"] = get_settings().aws.AWS_ACCESS_KEY_ID
Expand Down Expand Up @@ -405,11 +406,12 @@ async def chat_completion(self, model: str, system: str, user: str, temperature:
if get_settings().config.verbosity_level >= 2:
get_logger().info(f"\nSystem prompt:\n{system}")
get_logger().info(f"\nUser prompt:\n{user}")

# Support for Ollama Cloud
if model.startswith('ollama') and get_settings().get("OLLAMA.API_KEY", None):

# Inject api_key to the call. This key is populated during init by providers
# like Groq, XAI, Azure AD, and OpenRouter. Skip if None or placeholder.
if litellm.api_key and litellm.api_key != DUMMY_LITELLM_API_KEY:
kwargs["api_key"] = litellm.api_key

# Get completion with automatic streaming detection
resp, finish_reason, response_obj = await self._get_completion(**kwargs)

Expand Down
333 changes: 333 additions & 0 deletions tests/unittest/test_litellm_api_key_guard.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,333 @@
"""
Tests for the litellm.api_key guard in LiteLLMAIHandler.chat_completion.

Verifies:
- Placeholder key (DUMMY_LITELLM_API_KEY) is never injected into the call.
- None is not injected (e.g. when OpenAI key is set via litellm.openai_key).
- Real provider keys (Groq, XAI, OpenRouter, Azure AD) ARE injected.
"""
from unittest.mock import AsyncMock, MagicMock, patch

import litellm
import pytest

import pr_agent.algo.ai_handlers.litellm_ai_handler as litellm_handler
from pr_agent.algo.ai_handlers.litellm_ai_handler import DUMMY_LITELLM_API_KEY, LiteLLMAIHandler


def _make_settings():
"""Minimal settings object that satisfies __init__ and chat_completion."""
return type("Settings", (), {
"config": type("Config", (), {
"reasoning_effort": None,
"ai_timeout": 30,
"custom_reasoning_model": False,
"max_model_tokens": 32000,
"verbosity_level": 0,
"seed": -1,
"get": lambda self, key, default=None: default,
})(),
"litellm": type("LiteLLM", (), {
"get": lambda self, key, default=None: default,
})(),
"get": lambda self, key, default=None: default,
})()


def _mock_response():
"""Minimal acompletion response."""
mock = MagicMock()
mock.__getitem__ = lambda self, key: {
"choices": [{"message": {"content": "ok"}, "finish_reason": "stop"}]
}[key]
mock.dict.return_value = {"choices": [{"message": {"content": "ok"}, "finish_reason": "stop"}]}
return mock
Comment on lines +37 to +44
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Action required

1. Broken litellm response mock 🐞 Bug ☼ Reliability

The new unit tests’ _mock_response() assigns a lambda to mock.__getitem__, but
_get_completion() requires response["choices"] to return a real list and checks
len(response["choices"]); this mock setup is likely not honored by response[...] and can cause
openai.APIError during the tests. As written, the tests are unstable and may not actually validate
the intended behavior.
Agent Prompt
### Issue description
The tests’ `_mock_response()` does not reliably satisfy the production access pattern `response["choices"]` + `len(response["choices"])` in `_get_completion()`. This can cause the tests to fail (or validate the wrong behavior) depending on mock internals.

### Issue Context
`LiteLLMAIHandler._get_completion()` expects an object that:
- supports `response["choices"]` returning a list
- supports `response.dict()` returning a dict-like structure for logging

### Fix Focus Areas
- tests/unittest/test_litellm_api_key_guard.py[37-44]
- pr_agent/algo/ai_handlers/litellm_ai_handler.py[440-459]

### Implementation direction
Update `_mock_response()` to use a backing dict and configure magic methods in a standard way, e.g.:
- `data = {"choices": [{"message": {"content": "ok"}, "finish_reason": "stop"}]}`
- `mock = MagicMock()`
- `mock.__getitem__.side_effect = data.__getitem__`
- `mock.dict.return_value = data`
This ensures `response["choices"]` returns a real list and the handler’s logging path still works.

ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools



@pytest.fixture(autouse=True)
def patch_settings(monkeypatch):
monkeypatch.setattr(litellm_handler, "get_settings", lambda: _make_settings())


def _make_anthropic_settings():
"""Settings with ANTHROPIC.KEY configured, no OPENAI.KEY.

This simulates the original bug scenario: ANTHROPIC.KEY is set,
but OPENAI.KEY is not, so litellm.api_key falls back to DUMMY_LITELLM_API_KEY.
"""
anthropic_key = "test-anthropic-key-12345"
return type("Settings", (), {
"config": type("Config", (), {
"reasoning_effort": None,
"ai_timeout": 30,
"custom_reasoning_model": False,
"max_model_tokens": 32000,
"verbosity_level": 0,
"seed": -1,
"get": lambda self, key, default=None: default,
})(),
"litellm": type("LiteLLM", (), {
"get": lambda self, key, default=None: default,
})(),
"anthropic": type("Anthropic", (), {
"key": anthropic_key
})(),
# Return the Anthropic key when settings.get("ANTHROPIC.KEY") is called
"get": lambda self, key, default=None: (
anthropic_key if key == "ANTHROPIC.KEY" else default
),
})()


class TestApiKeyGuard:

@pytest.mark.asyncio
async def test_dummy_key_not_forwarded(self, monkeypatch):
"""api_key must NOT appear in kwargs when litellm.api_key is the placeholder."""
monkeypatch.setattr(litellm, "api_key", DUMMY_LITELLM_API_KEY)

with patch("pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion",
new_callable=AsyncMock) as mock_call:
mock_call.return_value = _mock_response()
handler = LiteLLMAIHandler()
await handler.chat_completion(model="gpt-4o", system="sys", user="usr")

assert "api_key" not in mock_call.call_args[1]

@pytest.mark.asyncio
async def test_none_api_key_not_forwarded(self, monkeypatch):
"""api_key must NOT appear in kwargs when litellm.api_key is None.

This is the OpenAI-key path: OPENAI.KEY sets litellm.openai_key,
leaving litellm.api_key at None.
"""
monkeypatch.setattr(litellm, "api_key", None)

with patch("pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion",
new_callable=AsyncMock) as mock_call:
mock_call.return_value = _mock_response()
handler = LiteLLMAIHandler()
await handler.chat_completion(model="gpt-4o", system="sys", user="usr")

assert "api_key" not in mock_call.call_args[1]

@pytest.mark.asyncio
async def test_real_key_forwarded(self, monkeypatch):
"""api_key IS injected when a real provider key is in litellm.api_key (e.g. Groq, XAI).

The key is set after __init__ to simulate a provider having stored its key there
during initialization, without triggering the placeholder value in __init__.
"""
real_key = "test-provider-key-67890"

with patch("pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion",
new_callable=AsyncMock) as mock_call:
mock_call.return_value = _mock_response()
handler = LiteLLMAIHandler()
# Set after init so __init__'s own dummy-key assignment doesn't overwrite it
monkeypatch.setattr(litellm, "api_key", real_key)
await handler.chat_completion(model="gpt-4o", system="sys", user="usr")

assert mock_call.call_args[1]["api_key"] == real_key

@pytest.mark.asyncio
async def test_anthropic_key_not_shadowed_by_dummy_key(self, monkeypatch):
"""Original bug scenario: ANTHROPIC.KEY configured without OPENAI.KEY.

During __init__, litellm.api_key is set to DUMMY_LITELLM_API_KEY (fallback)
because OPENAI.KEY is not configured. But litellm.anthropic_key is also set.
The guard must prevent the dummy key from being passed to the call,
allowing litellm to use anthropic_key internally.

This test replicates the exact bug from GitHub issue #2042.
"""
# Override settings to simulate Anthropic configured, OpenAI not configured
monkeypatch.setattr(litellm_handler, "get_settings", _make_anthropic_settings)

# Ensure deterministic preconditions: delete OPENAI_API_KEY env var so __init__
# will set litellm.api_key to DUMMY_LITELLM_API_KEY (line 42-43 of litellm_ai_handler.py)
monkeypatch.delenv("OPENAI_API_KEY", raising=False)

# Reset litellm.api_key to avoid cross-test state pollution
monkeypatch.setattr(litellm, "api_key", None)

with patch("pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion",
new_callable=AsyncMock) as mock_call:
mock_call.return_value = _mock_response()
handler = LiteLLMAIHandler()

# After init: litellm.api_key should be the dummy (OpenAI fallback),
# but litellm.anthropic_key is the real Anthropic key
assert litellm.api_key == DUMMY_LITELLM_API_KEY

# Call with Anthropic model
await handler.chat_completion(
model="claude-3-5-sonnet-20241022",
system="sys",
user="usr"
)

# Verify the dummy key was NOT passed to the call.
# This allows litellm to use litellm.anthropic_key internally.
assert "api_key" not in mock_call.call_args[1]

@pytest.mark.asyncio
async def test_groq_key_forwarded_for_non_ollama_model(self, monkeypatch):
"""Regression check for PR #2288: Groq key must be forwarded for non-Ollama models.

PR #2288 changed the forwarding guard to only forward api_key when
model.startswith('ollama'). This test verifies whether that approach
silently drops the Groq key when calling a non-Ollama model (e.g. gpt-4o).

Groq sets litellm.api_key during __init__ (see litellm_ai_handler.py line 73)
and relies on it being passed via kwargs["api_key"] to acompletion.
"""
groq_key = "test-groq-key-12345"

groq_settings = type("Settings", (), {
"config": type("Config", (), {
"reasoning_effort": None,
"ai_timeout": 30,
"custom_reasoning_model": False,
"max_model_tokens": 32000,
"verbosity_level": 0,
"seed": -1,
"get": lambda self, key, default=None: default,
})(),
"litellm": type("LiteLLM", (), {
"get": lambda self, key, default=None: default,
})(),
"groq": type("Groq", (), {
"key": groq_key,
})(),
"get": lambda self, key, default=None: (
groq_key if key == "GROQ.KEY" else default
),
})()

monkeypatch.setattr(litellm_handler, "get_settings", lambda: groq_settings)
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
monkeypatch.setattr(litellm, "api_key", None)

with patch("pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion",
new_callable=AsyncMock) as mock_call:
mock_call.return_value = _mock_response()
handler = LiteLLMAIHandler()

# Confirm __init__ stored the Groq key in litellm.api_key
assert litellm.api_key == groq_key, (
f"Expected litellm.api_key to be Groq key after __init__, got: {litellm.api_key!r}"
)

# Call with a non-Ollama model
await handler.chat_completion(model="gpt-4o", system="sys", user="usr")

# The Groq key must be forwarded — without it, Groq calls will fail auth
assert mock_call.call_args[1].get("api_key") == groq_key, (
f"Groq key was NOT forwarded to acompletion. "
f"kwargs had: {mock_call.call_args[1]}"
)

@pytest.mark.asyncio
async def test_xai_key_forwarded_for_non_ollama_model(self, monkeypatch):
"""Regression check for PR #2288: xAI key must be forwarded for non-Ollama models.

Similar to Groq, xAI sets litellm.api_key during __init__ and relies on
it being forwarded via kwargs["api_key"]. PR #2288's model-scoped approach
would also break xAI.
"""
xai_key = "xai-test-key-67890"

xai_settings = type("Settings", (), {
"config": type("Config", (), {
"reasoning_effort": None,
"ai_timeout": 30,
"custom_reasoning_model": False,
"max_model_tokens": 32000,
"verbosity_level": 0,
"seed": -1,
"get": lambda self, key, default=None: default,
})(),
"litellm": type("LiteLLM", (), {
"get": lambda self, key, default=None: default,
})(),
"xai": type("XAI", (), {
"key": xai_key,
})(),
"get": lambda self, key, default=None: (
xai_key if key == "XAI.KEY" else default
),
})()

monkeypatch.setattr(litellm_handler, "get_settings", lambda: xai_settings)
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
monkeypatch.setattr(litellm, "api_key", None)

with patch("pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion",
new_callable=AsyncMock) as mock_call:
mock_call.return_value = _mock_response()
handler = LiteLLMAIHandler()

assert litellm.api_key == xai_key
await handler.chat_completion(model="gpt-4o", system="sys", user="usr")

assert mock_call.call_args[1].get("api_key") == xai_key

@pytest.mark.asyncio
async def test_ollama_and_groq_coexist(self, monkeypatch):
"""Verify both Ollama and Groq keys can coexist and be forwarded correctly.

When multiple providers are configured, litellm.api_key gets overwritten
sequentially during __init__. The sentinel guard should still forward
whatever real key is currently in litellm.api_key.
"""
groq_key = "gsk-groq-key"
ollama_key = "ollama-key"

# Simulate: Groq key set first, then Ollama overwrites litellm.api_key
mixed_settings = type("Settings", (), {
"config": type("Config", (), {
"reasoning_effort": None,
"ai_timeout": 30,
"custom_reasoning_model": False,
"max_model_tokens": 32000,
"verbosity_level": 0,
"seed": -1,
"get": lambda self, key, default=None: default,
})(),
"litellm": type("LiteLLM", (), {
"get": lambda self, key, default=None: default,
})(),
"groq": type("Groq", (), {"key": groq_key})(),
"ollama": type("Ollama", (), {
"api_key": ollama_key,
"api_base": "http://localhost:11434",
})(),
"get": lambda self, key, default=None: (
groq_key if key == "GROQ.KEY" else
ollama_key if key == "OLLAMA.API_KEY" else
"http://localhost:11434" if key == "OLLAMA.API_BASE" else
default
),
})()

monkeypatch.setattr(litellm_handler, "get_settings", lambda: mixed_settings)
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
monkeypatch.setattr(litellm, "api_key", None)

with patch("pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion",
new_callable=AsyncMock) as mock_call:
mock_call.return_value = _mock_response()
handler = LiteLLMAIHandler()

# After init, litellm.api_key should be Ollama (last assignment)
assert litellm.api_key == ollama_key

# Call with Ollama model — should get Ollama key
await handler.chat_completion(model="ollama/mistral", system="sys", user="usr")
assert mock_call.call_args[1]["api_key"] == ollama_key

# Call with non-Ollama model — should still forward the key
# (which is Ollama in this case, but the guard correctly allows real keys through)
await handler.chat_completion(model="gpt-4o", system="sys", user="usr")
assert mock_call.call_args[1]["api_key"] == ollama_key