@@ -221,127 +221,6 @@ async def call_api():
221221 assert len (elements ) > 0
222222
223223
224- @pytest .mark .parametrize ("split_pdf" , [True , False ])
225- @pytest .mark .parametrize ("vlm_model" , ["gpt-4o" ])
226- @pytest .mark .parametrize ("vlm_model_provider" , ["openai" ])
227- @pytest .mark .parametrize (
228- "filename" ,
229- [
230- "layout-parser-paper-fast.pdf" ,
231- "fake-power-point.ppt" ,
232- "embedded-images-tables.jpg" ,
233- ]
234- )
235- def test_partition_strategy_vlm_openai (split_pdf , vlm_model , vlm_model_provider , client , doc_path , filename ):
236- with open (doc_path / filename , "rb" ) as f :
237- files = shared .Files (
238- content = f .read (),
239- file_name = filename ,
240- )
241-
242- req = operations .PartitionRequest (
243- partition_parameters = shared .PartitionParameters (
244- files = files ,
245- strategy = "vlm" ,
246- vlm_model = vlm_model ,
247- vlm_model_provider = vlm_model_provider ,
248- languages = ["eng" ],
249- split_pdf_page = split_pdf ,
250- )
251- )
252-
253- response = client .general .partition (
254- request = req
255- )
256- assert response .status_code == 200
257- assert len (response .elements ) > 0
258- assert response .elements [0 ]["metadata" ]["partitioner_type" ] == "vlm_partition"
259-
260-
261- @pytest .mark .parametrize ("split_pdf" , [True , False ])
262- @pytest .mark .parametrize ("vlm_model" ,
263- [
264- "us.amazon.nova-pro-v1:0" ,
265- "us.amazon.nova-lite-v1:0" ,
266- "us.anthropic.claude-3-5-sonnet-20241022-v2:0" ,
267- "us.anthropic.claude-3-opus-20240229-v1:0" ,
268- "us.anthropic.claude-3-haiku-20240307-v1:0" ,
269- "us.anthropic.claude-3-sonnet-20240229-v1:0" ,
270- "us.meta.llama3-2-90b-instruct-v1:0" ,
271- "us.meta.llama3-2-11b-instruct-v1:0" ,
272- ]
273- )
274- @pytest .mark .parametrize ("vlm_model_provider" , ["bedrock" ])
275- @pytest .mark .parametrize (
276- "filename" ,
277- [
278- "layout-parser-paper-fast.pdf" ,
279- "fake-power-point.ppt" ,
280- "embedded-images-tables.jpg" ,
281- ]
282- )
283- def test_partition_strategy_vlm_bedrock (split_pdf , vlm_model , vlm_model_provider , client , doc_path , filename ):
284- with open (doc_path / filename , "rb" ) as f :
285- files = shared .Files (
286- content = f .read (),
287- file_name = filename ,
288- )
289-
290- req = operations .PartitionRequest (
291- partition_parameters = shared .PartitionParameters (
292- files = files ,
293- strategy = "vlm" ,
294- vlm_model = vlm_model ,
295- vlm_model_provider = vlm_model_provider ,
296- languages = ["eng" ],
297- split_pdf_page = split_pdf ,
298- )
299- )
300-
301- response = client .general .partition (
302- request = req
303- )
304- assert response .status_code == 200
305- assert len (response .elements ) > 0
306- assert response .elements [0 ]["metadata" ]["partitioner_type" ] == "vlm_partition"
307-
308- @pytest .mark .parametrize ("split_pdf" , [True , False ])
309- @pytest .mark .parametrize ("vlm_model" , ["claude-3-5-sonnet-20241022" ,])
310- @pytest .mark .parametrize ("vlm_model_provider" , ["anthropic" ])
311- @pytest .mark .parametrize (
312- "filename" ,
313- [
314- "layout-parser-paper-fast.pdf" ,
315- "fake-power-point.ppt" ,
316- "embedded-images-tables.jpg" ,
317- ]
318- )
319- def test_partition_strategy_vlm_anthropic (split_pdf , vlm_model , vlm_model_provider , client , doc_path , filename ):
320- with open (doc_path / filename , "rb" ) as f :
321- files = shared .Files (
322- content = f .read (),
323- file_name = filename ,
324- )
325-
326- req = operations .PartitionRequest (
327- partition_parameters = shared .PartitionParameters (
328- files = files ,
329- strategy = "vlm" ,
330- vlm_model = vlm_model ,
331- vlm_model_provider = vlm_model_provider ,
332- languages = ["eng" ],
333- split_pdf_page = split_pdf ,
334- )
335- )
336-
337- response = client .general .partition (
338- request = req
339- )
340- assert response .status_code == 200
341- assert len (response .elements ) > 0
342- assert response .elements [0 ]["metadata" ]["partitioner_type" ] == "vlm_partition"
343-
344-
345224def test_returns_422_for_invalid_pdf (
346225 caplog : pytest .LogCaptureFixture ,
347226 doc_path : Path ,
0 commit comments