Skip to content

Commit 261d680

Browse files
authored
Merge pull request #73 from Achieve3318/feat/anthropic-vlm-provider
feat: add Anthropic Claude VLM provider
2 parents 28a3a9d + 3cb5a5e commit 261d680

File tree

6 files changed

+271
-2
lines changed

6 files changed

+271
-2
lines changed

paperbanana/core/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ class Settings(BaseSettings):
9393
google_api_key: Optional[str] = Field(default=None, alias="GOOGLE_API_KEY")
9494
openrouter_api_key: Optional[str] = Field(default=None, alias="OPENROUTER_API_KEY")
9595
openai_api_key: Optional[str] = Field(default=None, alias="OPENAI_API_KEY")
96+
anthropic_api_key: Optional[str] = Field(default=None, alias="ANTHROPIC_API_KEY")
9697
openai_base_url: str = Field(default="https://api.openai.com/v1", alias="OPENAI_BASE_URL")
9798
openai_vlm_model: Optional[str] = Field(default=None, alias="OPENAI_VLM_MODEL")
9899
openai_image_model: Optional[str] = Field(default=None, alias="OPENAI_IMAGE_MODEL")

paperbanana/providers/registry.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,13 @@
3333
" 2. Set the environment variable:\n\n"
3434
" export OPENAI_API_KEY=your-key-here"
3535
),
36+
"ANTHROPIC_API_KEY": (
37+
"ANTHROPIC_API_KEY not found.\n\n"
38+
"To fix this:\n"
39+
" 1. Get an API key at: https://console.anthropic.com/settings/keys\n"
40+
" 2. Set the environment variable:\n\n"
41+
" export ANTHROPIC_API_KEY=your-key-here"
42+
),
3643
"AWS_CREDENTIALS": (
3744
"AWS credentials not found for Bedrock.\n\n"
3845
"To fix this, configure one of:\n"
@@ -111,9 +118,18 @@ def create_vlm(settings: Settings) -> VLMProvider:
111118
region=settings.aws_region,
112119
profile=settings.aws_profile,
113120
)
121+
elif provider == "anthropic":
122+
_validate_api_key(settings.anthropic_api_key, "ANTHROPIC_API_KEY")
123+
from paperbanana.providers.vlm.anthropic import AnthropicVLM
124+
125+
return AnthropicVLM(
126+
api_key=settings.anthropic_api_key,
127+
model=settings.vlm_model,
128+
)
114129
else:
115130
raise ValueError(
116-
f"Unknown VLM provider: {provider}. Available: gemini, openrouter, openai, bedrock"
131+
"Unknown VLM provider: "
132+
f"{provider}. Available: gemini, openrouter, openai, bedrock, anthropic"
117133
)
118134

119135
@staticmethod
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
"""Anthropic Claude VLM provider."""
2+
3+
from __future__ import annotations
4+
5+
from typing import Optional
6+
7+
import structlog
8+
from PIL import Image
9+
from tenacity import retry, stop_after_attempt, wait_exponential
10+
11+
from paperbanana.core.utils import image_to_base64
12+
from paperbanana.providers.base import VLMProvider
13+
14+
logger = structlog.get_logger()
15+
16+
17+
class AnthropicVLM(VLMProvider):
18+
"""VLM provider using the Anthropic Python SDK (async).
19+
20+
Works with Claude 3.x / 4.x models via the Messages API.
21+
"""
22+
23+
def __init__(
24+
self,
25+
api_key: Optional[str] = None,
26+
model: str = "claude-3-5-sonnet-20251023",
27+
):
28+
self._api_key = api_key
29+
self._model = model
30+
self._client = None
31+
32+
@property
33+
def name(self) -> str:
34+
return "anthropic"
35+
36+
@property
37+
def model_name(self) -> str:
38+
return self._model
39+
40+
def _get_client(self):
41+
"""Lazy-init an AsyncAnthropic client."""
42+
if self._client is None:
43+
try:
44+
from anthropic import AsyncAnthropic
45+
46+
self._client = AsyncAnthropic(api_key=self._api_key)
47+
except ImportError:
48+
raise ImportError(
49+
"anthropic is required for the Anthropic provider. "
50+
"Install with: pip install 'paperbanana[anthropic]'"
51+
)
52+
return self._client
53+
54+
def is_available(self) -> bool:
55+
return self._api_key is not None
56+
57+
@retry(stop=stop_after_attempt(3), wait=wait_exponential(min=2, max=30))
58+
async def generate(
59+
self,
60+
prompt: str,
61+
images: Optional[list[Image.Image]] = None,
62+
system_prompt: Optional[str] = None,
63+
temperature: float = 1.0,
64+
max_tokens: int = 4096,
65+
response_format: Optional[str] = None,
66+
) -> str:
67+
client = self._get_client()
68+
69+
content: list[dict] = []
70+
if images:
71+
for img in images:
72+
b64 = image_to_base64(img)
73+
content.append(
74+
{
75+
"type": "image",
76+
"source": {
77+
"type": "base64",
78+
"media_type": "image/png",
79+
"data": b64,
80+
},
81+
}
82+
)
83+
84+
content.append({"type": "text", "text": prompt})
85+
86+
messages = [
87+
{
88+
"role": "user",
89+
"content": content,
90+
}
91+
]
92+
93+
params: dict = {
94+
"model": self._model,
95+
"max_tokens": max_tokens,
96+
"messages": messages,
97+
"temperature": temperature,
98+
}
99+
100+
if system_prompt:
101+
params["system"] = system_prompt
102+
103+
if response_format == "json":
104+
# Use structured outputs with a permissive JSON schema so callers
105+
# receive machine-parseable JSON while retaining flexibility.
106+
params["output_config"] = {
107+
"format": {
108+
"type": "json_schema",
109+
"schema": {
110+
"type": "object",
111+
"additionalProperties": True,
112+
},
113+
}
114+
}
115+
116+
response = await client.messages.create(**params)
117+
118+
# Anthropic returns a list of content blocks; we concatenate all text blocks.
119+
parts: list[str] = []
120+
for block in getattr(response, "content", []):
121+
# Support both SDK objects and plain dicts in tests.
122+
block_type = getattr(block, "type", None)
123+
if isinstance(block, dict):
124+
block_type = block.get("type")
125+
if block_type == "text":
126+
text_value = getattr(block, "text", None)
127+
if isinstance(block, dict):
128+
text_value = block.get("text", text_value)
129+
if text_value:
130+
parts.append(text_value)
131+
132+
text = "".join(parts)
133+
134+
logger.debug(
135+
"Anthropic response",
136+
model=self._model,
137+
usage=getattr(response, "usage", None),
138+
)
139+
return text

pyproject.toml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,13 @@ dependencies = [
4646
google = ["google-genai>=1.65"]
4747
openai = ["openai>=1.0"]
4848
bedrock = ["boto3>=1.34"]
49-
all-providers = ["google-genai>=1.65", "openai>=1.0", "boto3>=1.34"]
49+
anthropic = ["anthropic>=0.83"]
50+
all-providers = [
51+
"google-genai>=1.65",
52+
"openai>=1.0",
53+
"boto3>=1.34",
54+
"anthropic>=0.83",
55+
]
5056
mcp = ["fastmcp>=2.0"]
5157
dev = [
5258
"pytest>=8.0",
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
"""Tests for the Anthropic VLM provider."""
2+
3+
from __future__ import annotations
4+
5+
import types
6+
from typing import Any
7+
8+
import pytest
9+
from PIL import Image
10+
11+
from paperbanana.providers.vlm.anthropic import AnthropicVLM
12+
13+
14+
@pytest.mark.asyncio
15+
async def test_generate_text_only(monkeypatch: pytest.MonkeyPatch) -> None:
16+
"""AnthropicVLM.generate should send a basic text-only request and return text."""
17+
captured: dict[str, Any] = {}
18+
19+
class _FakeMessages:
20+
async def create(self, **kwargs: Any) -> Any: # type: ignore[override]
21+
captured.update(kwargs)
22+
block = types.SimpleNamespace(type="text", text="hello world")
23+
resp = types.SimpleNamespace(content=[block], usage=None)
24+
return resp
25+
26+
class _FakeClient:
27+
def __init__(self, *args: Any, **kwargs: Any) -> None: # noqa: D401
28+
self.messages = _FakeMessages()
29+
30+
# Patch anthropic.AsyncAnthropic before the provider imports it.
31+
fake_mod = types.ModuleType("anthropic")
32+
fake_mod.AsyncAnthropic = _FakeClient # type: ignore[attr-defined]
33+
34+
import sys
35+
36+
monkeypatch.setitem(sys.modules, "anthropic", fake_mod)
37+
38+
vlm = AnthropicVLM(api_key="test-key", model="claude-3-5-sonnet-20251023")
39+
text = await vlm.generate("Hi Claude")
40+
41+
assert text == "hello world"
42+
assert captured["model"] == vlm.model_name
43+
assert captured["max_tokens"] == 4096
44+
assert isinstance(captured["messages"], list)
45+
assert captured["messages"][0]["role"] == "user"
46+
47+
48+
@pytest.mark.asyncio
49+
async def test_generate_with_images_and_json(monkeypatch: pytest.MonkeyPatch) -> None:
50+
"""AnthropicVLM.generate should inline images and enable JSON mode when requested."""
51+
captured: dict[str, Any] = {}
52+
53+
class _FakeMessages:
54+
async def create(self, **kwargs: Any) -> Any: # type: ignore[override]
55+
captured.update(kwargs)
56+
block = types.SimpleNamespace(type="text", text="{}")
57+
resp = types.SimpleNamespace(content=[block], usage=None)
58+
return resp
59+
60+
class _FakeClient:
61+
def __init__(self, *args: Any, **kwargs: Any) -> None: # noqa: D401
62+
self.messages = _FakeMessages()
63+
64+
fake_mod = types.ModuleType("anthropic")
65+
fake_mod.AsyncAnthropic = _FakeClient # type: ignore[attr-defined]
66+
67+
import sys
68+
69+
monkeypatch.setitem(sys.modules, "anthropic", fake_mod)
70+
71+
# Avoid depending on real base64 implementation details.
72+
def _fake_image_to_base64(_img: Image.Image) -> str:
73+
return "base64-image-data"
74+
75+
monkeypatch.setattr(
76+
"paperbanana.providers.vlm.anthropic.image_to_base64",
77+
_fake_image_to_base64,
78+
)
79+
80+
vlm = AnthropicVLM(api_key="test-key", model="claude-3-5-sonnet-20251023")
81+
img = Image.new("RGB", (4, 4))
82+
83+
await vlm.generate("Hi with image", images=[img], response_format="json")
84+
85+
assert captured["model"] == vlm.model_name
86+
msg = captured["messages"][0]
87+
assert msg["role"] == "user"
88+
content = msg["content"]
89+
assert content[0]["type"] == "image"
90+
assert content[0]["source"]["data"] == "base64-image-data"
91+
assert content[-1]["type"] == "text"
92+
assert content[-1]["text"] == "Hi with image"
93+
94+
output_config = captured["output_config"]
95+
fmt = output_config["format"]
96+
assert fmt["type"] == "json_schema"
97+
assert isinstance(fmt["schema"], dict)

tests/test_providers/test_registry.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,16 @@ def test_missing_openrouter_api_key_raises_helpful_error():
5151
assert "export OPENROUTER_API_KEY" in error_msg
5252

5353

54+
def test_missing_anthropic_api_key_raises_helpful_error():
55+
"""Test that missing ANTHROPIC_API_KEY raises a helpful error with setup instructions."""
56+
settings = Settings(vlm_provider="anthropic", anthropic_api_key=None)
57+
with pytest.raises(ValueError, match="ANTHROPIC_API_KEY not found") as exc_info:
58+
ProviderRegistry.create_vlm(settings)
59+
error_msg = str(exc_info.value)
60+
assert "console.anthropic.com" in error_msg
61+
assert "export ANTHROPIC_API_KEY" in error_msg
62+
63+
5464
def test_missing_google_api_key_for_image_gen_raises_helpful_error():
5565
"""Test that missing GOOGLE_API_KEY for image gen raises a helpful error."""
5666
settings = Settings(image_provider="google_imagen", google_api_key=None)

0 commit comments

Comments
 (0)