@@ -229,127 +229,6 @@ async def call_api():
229229 assert len (elements ) > 0
230230
231231
232- @pytest .mark .parametrize ("split_pdf" , [True , False ])
233- @pytest .mark .parametrize ("vlm_model" , ["gpt-4o" ])
234- @pytest .mark .parametrize ("vlm_model_provider" , ["openai" ])
235- @pytest .mark .parametrize (
236- "filename" ,
237- [
238- "layout-parser-paper-fast.pdf" ,
239- "fake-power-point.ppt" ,
240- "embedded-images-tables.jpg" ,
241- ]
242- )
243- def test_partition_strategy_vlm_openai (split_pdf , vlm_model , vlm_model_provider , client , doc_path , filename ):
244- with open (doc_path / filename , "rb" ) as f :
245- files = shared .Files (
246- content = f .read (),
247- file_name = filename ,
248- )
249-
250- req = operations .PartitionRequest (
251- partition_parameters = shared .PartitionParameters (
252- files = files ,
253- strategy = "vlm" ,
254- vlm_model = vlm_model ,
255- vlm_model_provider = vlm_model_provider ,
256- languages = ["eng" ],
257- split_pdf_page = split_pdf ,
258- )
259- )
260-
261- response = client .general .partition (
262- request = req
263- )
264- assert response .status_code == 200
265- assert len (response .elements ) > 0
266- assert response .elements [0 ]["metadata" ]["partitioner_type" ] == "vlm_partition"
267-
268-
269- @pytest .mark .parametrize ("split_pdf" , [True , False ])
270- @pytest .mark .parametrize ("vlm_model" ,
271- [
272- "us.amazon.nova-pro-v1:0" ,
273- "us.amazon.nova-lite-v1:0" ,
274- "us.anthropic.claude-3-5-sonnet-20241022-v2:0" ,
275- "us.anthropic.claude-3-opus-20240229-v1:0" ,
276- "us.anthropic.claude-3-haiku-20240307-v1:0" ,
277- "us.anthropic.claude-3-sonnet-20240229-v1:0" ,
278- "us.meta.llama3-2-90b-instruct-v1:0" ,
279- "us.meta.llama3-2-11b-instruct-v1:0" ,
280- ]
281- )
282- @pytest .mark .parametrize ("vlm_model_provider" , ["bedrock" ])
283- @pytest .mark .parametrize (
284- "filename" ,
285- [
286- "layout-parser-paper-fast.pdf" ,
287- "fake-power-point.ppt" ,
288- "embedded-images-tables.jpg" ,
289- ]
290- )
291- def test_partition_strategy_vlm_bedrock (split_pdf , vlm_model , vlm_model_provider , client , doc_path , filename ):
292- with open (doc_path / filename , "rb" ) as f :
293- files = shared .Files (
294- content = f .read (),
295- file_name = filename ,
296- )
297-
298- req = operations .PartitionRequest (
299- partition_parameters = shared .PartitionParameters (
300- files = files ,
301- strategy = "vlm" ,
302- vlm_model = vlm_model ,
303- vlm_model_provider = vlm_model_provider ,
304- languages = ["eng" ],
305- split_pdf_page = split_pdf ,
306- )
307- )
308-
309- response = client .general .partition (
310- request = req
311- )
312- assert response .status_code == 200
313- assert len (response .elements ) > 0
314- assert response .elements [0 ]["metadata" ]["partitioner_type" ] == "vlm_partition"
315-
316- @pytest .mark .parametrize ("split_pdf" , [True , False ])
317- @pytest .mark .parametrize ("vlm_model" , ["claude-3-5-sonnet-20241022" ,])
318- @pytest .mark .parametrize ("vlm_model_provider" , ["anthropic" ])
319- @pytest .mark .parametrize (
320- "filename" ,
321- [
322- "layout-parser-paper-fast.pdf" ,
323- "fake-power-point.ppt" ,
324- "embedded-images-tables.jpg" ,
325- ]
326- )
327- def test_partition_strategy_vlm_anthropic (split_pdf , vlm_model , vlm_model_provider , client , doc_path , filename ):
328- with open (doc_path / filename , "rb" ) as f :
329- files = shared .Files (
330- content = f .read (),
331- file_name = filename ,
332- )
333-
334- req = operations .PartitionRequest (
335- partition_parameters = shared .PartitionParameters (
336- files = files ,
337- strategy = "vlm" ,
338- vlm_model = vlm_model ,
339- vlm_model_provider = vlm_model_provider ,
340- languages = ["eng" ],
341- split_pdf_page = split_pdf ,
342- )
343- )
344-
345- response = client .general .partition (
346- request = req
347- )
348- assert response .status_code == 200
349- assert len (response .elements ) > 0
350- assert response .elements [0 ]["metadata" ]["partitioner_type" ] == "vlm_partition"
351-
352-
353232def test_returns_422_for_invalid_pdf (
354233 caplog : pytest .LogCaptureFixture ,
355234 doc_path : Path ,
0 commit comments