File tree Expand file tree Collapse file tree 3 files changed +25
-6
lines changed
paperbanana/providers/vlm Expand file tree Collapse file tree 3 files changed +25
-6
lines changed Original file line number Diff line number Diff line change @@ -72,7 +72,7 @@ async def generate(
7272 b64 = image_to_base64 (img )
7373 content .append (
7474 {
75- "type" : "input_image " ,
75+ "type" : "image " ,
7676 "source" : {
7777 "type" : "base64" ,
7878 "media_type" : "image/png" ,
@@ -101,8 +101,17 @@ async def generate(
101101 params ["system" ] = system_prompt
102102
103103 if response_format == "json" :
104- # Constrain the model to emit a JSON object if supported by the SDK.
105- params ["response_format" ] = {"type" : "json_object" }
104+ # Use structured outputs with a permissive JSON schema so callers
105+ # receive machine-parseable JSON while retaining flexibility.
106+ params ["output_config" ] = {
107+ "format" : {
108+ "type" : "json_schema" ,
109+ "schema" : {
110+ "type" : "object" ,
111+ "additionalProperties" : True ,
112+ },
113+ }
114+ }
106115
107116 response = await client .messages .create (** params )
108117
Original file line number Diff line number Diff line change @@ -46,7 +46,13 @@ dependencies = [
4646google = [" google-genai>=1.65" ]
4747openai = [" openai>=1.0" ]
4848bedrock = [" boto3>=1.34" ]
49- all-providers = [" google-genai>=1.65" , " openai>=1.0" , " boto3>=1.34" ]
49+ anthropic = [" anthropic>=0.83" ]
50+ all-providers = [
51+ " google-genai>=1.65" ,
52+ " openai>=1.0" ,
53+ " boto3>=1.34" ,
54+ " anthropic>=0.83" ,
55+ ]
5056mcp = [" fastmcp>=2.0" ]
5157dev = [
5258 " pytest>=8.0" ,
Original file line number Diff line number Diff line change @@ -86,8 +86,12 @@ def _fake_image_to_base64(_img: Image.Image) -> str:
8686 msg = captured ["messages" ][0 ]
8787 assert msg ["role" ] == "user"
8888 content = msg ["content" ]
89- assert content [0 ]["type" ] == "input_image "
89+ assert content [0 ]["type" ] == "image "
9090 assert content [0 ]["source" ]["data" ] == "base64-image-data"
9191 assert content [- 1 ]["type" ] == "text"
9292 assert content [- 1 ]["text" ] == "Hi with image"
93- assert captured ["response_format" ] == {"type" : "json_object" }
93+
94+ output_config = captured ["output_config" ]
95+ fmt = output_config ["format" ]
96+ assert fmt ["type" ] == "json_schema"
97+ assert isinstance (fmt ["schema" ], dict )
You can’t perform that action at this time.
0 commit comments