1414
1515
1616# FREEMIUM_URL = "https://api.unstructured.io"
17- FREEMIUM_URL = "http://127.0.0.1:5000 "
17+ FREEMIUM_URL = "http://127.0.0.1:8009 "
1818
1919
2020@pytest .fixture (scope = "module" )
@@ -236,12 +236,13 @@ async def call_api():
236236 assert len (elements ) > 0
237237
238238
239- # TODO (klaijan) - still cannot test against vlm if the updated client vlm is generated
239+ # TODO (klaijan) - still cannot test against vlm if the updated client vlm is not generated
240240# `vlm_model cannot be empty for vlm requests.` because partition_parameters do not have the param.
241+ # TODO (klaijan) - test with more models and providers
241242@pytest .mark .parametrize ("split_pdf" , [True , False ])
242243@pytest .mark .parametrize ("vlm_model" , ["gpt-4o" ])
243244@pytest .mark .parametrize ("vlm_model_provider" , ["openai" ])
244- def test_partition_strategy_vlm (split_pdf , vlm_model , vlm_model_provider , client , doc_path ):
245+ def test_partition_strategy_vlm_openai (split_pdf , vlm_model , vlm_model_provider , client , doc_path ):
245246 filename = "layout-parser-paper-fast.pdf"
246247 with open (doc_path / filename , "rb" ) as f :
247248 files = shared .Files (
@@ -255,8 +256,49 @@ def test_partition_strategy_vlm(split_pdf, vlm_model, vlm_model_provider, client
255256 strategy = "vlm" ,
256257 vlm_model = vlm_model ,
257258 vlm_model_provider = vlm_model_provider ,
258- # languages=["eng"],
259- # split_pdf_page=split_pdf,
259+ languages = ["eng" ],
260+ split_pdf_page = split_pdf ,
261+ )
262+ )
263+
264+ response = client .general .partition (
265+ server_url = FREEMIUM_URL ,
266+ request = req
267+ )
268+ assert response .status_code == 200
269+ assert len (response .elements )
270+
271+
272+ @pytest .mark .parametrize ("split_pdf" , [True , False ])
273+ @pytest .mark .parametrize ("vlm_model" ,
274+ [
275+ "us.amazon.nova-pro-v1:0" ,
276+ "us.amazon.nova-lite-v1:0" ,
277+ "us.anthropic.claude-3-5-sonnet-20241022-v2:0" ,
278+ "us.anthropic.claude-3-opus-20240229-v1:0" ,
279+ "us.anthropic.claude-3-haiku-20240307-v1:0" ,
280+ "us.anthropic.claude-3-sonnet-20240229-v1:0" ,
281+ "us.meta.llama3-2-90b-instruct-v1:0" ,
282+ "us.meta.llama3-2-11b-instruct-v1:0" ,
283+ ]
284+ )
285+ @pytest .mark .parametrize ("vlm_model_provider" , ["bedrock" ])
286+ def test_partition_strategy_vlm_bedrock (split_pdf , vlm_model , vlm_model_provider , client , doc_path ):
287+ filename = "layout-parser-paper-fast.pdf"
288+ with open (doc_path / filename , "rb" ) as f :
289+ files = shared .Files (
290+ content = f .read (),
291+ file_name = filename ,
292+ )
293+
294+ req = operations .PartitionRequest (
295+ partition_parameters = shared .PartitionParameters (
296+ files = files ,
297+ strategy = "vlm" ,
298+ vlm_model = vlm_model ,
299+ vlm_model_provider = vlm_model_provider ,
300+ languages = ["eng" ],
301+ split_pdf_page = split_pdf ,
260302 )
261303 )
262304
@@ -265,4 +307,38 @@ def test_partition_strategy_vlm(split_pdf, vlm_model, vlm_model_provider, client
265307 request = req
266308 )
267309 assert response .status_code == 200
268- assert len (response .elements )
310+ assert len (response .elements )
311+
312+
313+ @pytest .mark .parametrize ("split_pdf" , [True , False ])
314+ @pytest .mark .parametrize ("vlm_model" ,
315+ [
316+ "claude-3-5-sonnet-20241022" ,
317+ ]
318+ )
319+ @pytest .mark .parametrize ("vlm_model_provider" , ["anthropic" ])
320+ def test_partition_strategy_vlm_anthropic (split_pdf , vlm_model , vlm_model_provider , client , doc_path ):
321+ filename = "layout-parser-paper-fast.pdf"
322+ with open (doc_path / filename , "rb" ) as f :
323+ files = shared .Files (
324+ content = f .read (),
325+ file_name = filename ,
326+ )
327+
328+ req = operations .PartitionRequest (
329+ partition_parameters = shared .PartitionParameters (
330+ files = files ,
331+ strategy = "vlm" ,
332+ vlm_model = vlm_model ,
333+ vlm_model_provider = vlm_model_provider ,
334+ languages = ["eng" ],
335+ split_pdf_page = split_pdf ,
336+ )
337+ )
338+
339+ response = client .general .partition (
340+ server_url = FREEMIUM_URL ,
341+ request = req
342+ )
343+ assert response .status_code == 200
344+ assert len (response .elements )
0 commit comments