@@ -24,30 +24,30 @@ def doc_path() -> Path:
2424 return Path (__file__ ).resolve ().parents [2 ] / "_sample_docs"
2525
2626
27- @pytest .mark .parametrize ("split_pdf" , [True , False ])
28- @pytest .mark .parametrize ("strategy" , ["fast" , "ocr_only" , "hi_res" ])
29- def test_partition_strategies (split_pdf , strategy , client , doc_path ):
30- filename = "layout-parser-paper-fast.pdf"
31- with open (doc_path / filename , "rb" ) as f :
32- files = shared .Files (
33- content = f .read (),
34- file_name = filename ,
35- )
36-
37- req = operations .PartitionRequest (
38- partition_parameters = shared .PartitionParameters (
39- files = files ,
40- strategy = strategy ,
41- languages = ["eng" ],
42- split_pdf_page = split_pdf ,
43- )
44- )
45-
46- response = client .general .partition (
47- request = req
48- )
49- assert response .status_code == 200
50- assert len (response .elements )
27+ # @pytest.mark.parametrize("split_pdf", [True, False])
28+ # @pytest.mark.parametrize("strategy", ["fast", "ocr_only", "hi_res"])
29+ # def test_partition_strategies(split_pdf, strategy, client, doc_path):
30+ # filename = "layout-parser-paper-fast.pdf"
31+ # with open(doc_path / filename, "rb") as f:
32+ # files = shared.Files(
33+ # content=f.read(),
34+ # file_name=filename,
35+ # )
36+
37+ # req = operations.PartitionRequest(
38+ # partition_parameters=shared.PartitionParameters(
39+ # files=files,
40+ # strategy=strategy,
41+ # languages=["eng"],
42+ # split_pdf_page=split_pdf,
43+ # )
44+ # )
45+
46+ # response = client.general.partition(
47+ # request=req
48+ # )
49+ # assert response.status_code == 200
50+ # assert len(response.elements)
5151
5252
5353@pytest .mark .parametrize ("split_pdf" , [True , False ])
@@ -220,125 +220,125 @@ async def call_api():
220220 assert len (elements ) > 0
221221
222222
223- @pytest .mark .parametrize ("split_pdf" , [True , False ])
224- @pytest .mark .parametrize ("vlm_model" , ["gpt-4o" ])
225- @pytest .mark .parametrize ("vlm_model_provider" , ["openai" ])
226- @pytest .mark .parametrize (
227- "filename" ,
228- [
229- "layout-parser-paper-fast.pdf" ,
230- "fake-power-point.ppt" ,
231- "embedded-images-tables.jpg" ,
232- ]
233- )
234- def test_partition_strategy_vlm_openai (split_pdf , vlm_model , vlm_model_provider , client , doc_path , filename ):
235- with open (doc_path / filename , "rb" ) as f :
236- files = shared .Files (
237- content = f .read (),
238- file_name = filename ,
239- )
240-
241- req = operations .PartitionRequest (
242- partition_parameters = shared .PartitionParameters (
243- files = files ,
244- strategy = "vlm" ,
245- vlm_model = vlm_model ,
246- vlm_model_provider = vlm_model_provider ,
247- languages = ["eng" ],
248- split_pdf_page = split_pdf ,
249- )
250- )
251-
252- response = client .general .partition (
253- request = req
254- )
255- assert response .status_code == 200
256- assert len (response .elements ) > 0
257- assert response .elements [0 ]["metadata" ]["partitioner_type" ] == "vlm_partition"
258-
259-
260- @pytest .mark .parametrize ("split_pdf" , [True , False ])
261- @pytest .mark .parametrize ("vlm_model" ,
262- [
263- "us.amazon.nova-pro-v1:0" ,
264- "us.amazon.nova-lite-v1:0" ,
265- "us.anthropic.claude-3-5-sonnet-20241022-v2:0" ,
266- "us.anthropic.claude-3-opus-20240229-v1:0" ,
267- "us.anthropic.claude-3-haiku-20240307-v1:0" ,
268- "us.anthropic.claude-3-sonnet-20240229-v1:0" ,
269- "us.meta.llama3-2-90b-instruct-v1:0" ,
270- "us.meta.llama3-2-11b-instruct-v1:0" ,
271- ]
272- )
273- @pytest .mark .parametrize ("vlm_model_provider" , ["bedrock" ])
274- @pytest .mark .parametrize (
275- "filename" ,
276- [
277- "layout-parser-paper-fast.pdf" ,
278- "fake-power-point.ppt" ,
279- "embedded-images-tables.jpg" ,
280- ]
281- )
282- def test_partition_strategy_vlm_bedrock (split_pdf , vlm_model , vlm_model_provider , client , doc_path , filename ):
283- with open (doc_path / filename , "rb" ) as f :
284- files = shared .Files (
285- content = f .read (),
286- file_name = filename ,
287- )
288-
289- req = operations .PartitionRequest (
290- partition_parameters = shared .PartitionParameters (
291- files = files ,
292- strategy = "vlm" ,
293- vlm_model = vlm_model ,
294- vlm_model_provider = vlm_model_provider ,
295- languages = ["eng" ],
296- split_pdf_page = split_pdf ,
297- )
298- )
299-
300- response = client .general .partition (
301- request = req
302- )
303- assert response .status_code == 200
304- assert len (response .elements ) > 0
305- assert response .elements [0 ]["metadata" ]["partitioner_type" ] == "vlm_partition"
306-
307- @pytest .mark .parametrize ("split_pdf" , [True , False ])
308- @pytest .mark .parametrize ("vlm_model" , ["claude-3-5-sonnet-20241022" ,])
309- @pytest .mark .parametrize ("vlm_model_provider" , ["anthropic" ])
310- @pytest .mark .parametrize (
311- "filename" ,
312- [
313- "layout-parser-paper-fast.pdf" ,
314- "fake-power-point.ppt" ,
315- "embedded-images-tables.jpg" ,
316- ]
317- )
318- def test_partition_strategy_vlm_anthropic (split_pdf , vlm_model , vlm_model_provider , client , doc_path , filename ):
319- with open (doc_path / filename , "rb" ) as f :
320- files = shared .Files (
321- content = f .read (),
322- file_name = filename ,
323- )
324-
325- req = operations .PartitionRequest (
326- partition_parameters = shared .PartitionParameters (
327- files = files ,
328- strategy = "vlm" ,
329- vlm_model = vlm_model ,
330- vlm_model_provider = vlm_model_provider ,
331- languages = ["eng" ],
332- split_pdf_page = split_pdf ,
333- )
334- )
335-
336- response = client .general .partition (
337- request = req
338- )
339- assert response .status_code == 200
340- assert len (response .elements ) > 0
341- assert response .elements [0 ]["metadata" ]["partitioner_type" ] == "vlm_partition"
223+ # @pytest.mark.parametrize("split_pdf", [True, False])
224+ # @pytest.mark.parametrize("vlm_model", ["gpt-4o"])
225+ # @pytest.mark.parametrize("vlm_model_provider", ["openai"])
226+ # @pytest.mark.parametrize(
227+ # "filename",
228+ # [
229+ # "layout-parser-paper-fast.pdf",
230+ # "fake-power-point.ppt",
231+ # "embedded-images-tables.jpg",
232+ # ]
233+ # )
234+ # def test_partition_strategy_vlm_openai(split_pdf, vlm_model, vlm_model_provider, client, doc_path, filename):
235+ # with open(doc_path / filename, "rb") as f:
236+ # files = shared.Files(
237+ # content=f.read(),
238+ # file_name=filename,
239+ # )
240+
241+ # req = operations.PartitionRequest(
242+ # partition_parameters=shared.PartitionParameters(
243+ # files=files,
244+ # strategy="vlm",
245+ # vlm_model=vlm_model,
246+ # vlm_model_provider=vlm_model_provider,
247+ # languages=["eng"],
248+ # split_pdf_page=split_pdf,
249+ # )
250+ # )
251+
252+ # response = client.general.partition(
253+ # request=req
254+ # )
255+ # assert response.status_code == 200
256+ # assert len(response.elements) > 0
257+ # assert response.elements[0]["metadata"]["partitioner_type"] == "vlm_partition"
258+
259+
260+ # @pytest.mark.parametrize("split_pdf", [True, False])
261+ # @pytest.mark.parametrize("vlm_model",
262+ # [
263+ # "us.amazon.nova-pro-v1:0",
264+ # "us.amazon.nova-lite-v1:0",
265+ # "us.anthropic.claude-3-5-sonnet-20241022-v2:0",
266+ # "us.anthropic.claude-3-opus-20240229-v1:0",
267+ # "us.anthropic.claude-3-haiku-20240307-v1:0",
268+ # "us.anthropic.claude-3-sonnet-20240229-v1:0",
269+ # "us.meta.llama3-2-90b-instruct-v1:0",
270+ # "us.meta.llama3-2-11b-instruct-v1:0",
271+ # ]
272+ # )
273+ # @pytest.mark.parametrize("vlm_model_provider", ["bedrock"])
274+ # @pytest.mark.parametrize(
275+ # "filename",
276+ # [
277+ # "layout-parser-paper-fast.pdf",
278+ # "fake-power-point.ppt",
279+ # "embedded-images-tables.jpg",
280+ # ]
281+ # )
282+ # def test_partition_strategy_vlm_bedrock(split_pdf, vlm_model, vlm_model_provider, client, doc_path, filename):
283+ # with open(doc_path / filename, "rb") as f:
284+ # files = shared.Files(
285+ # content=f.read(),
286+ # file_name=filename,
287+ # )
288+
289+ # req = operations.PartitionRequest(
290+ # partition_parameters=shared.PartitionParameters(
291+ # files=files,
292+ # strategy="vlm",
293+ # vlm_model=vlm_model,
294+ # vlm_model_provider=vlm_model_provider,
295+ # languages=["eng"],
296+ # split_pdf_page=split_pdf,
297+ # )
298+ # )
299+
300+ # response = client.general.partition(
301+ # request=req
302+ # )
303+ # assert response.status_code == 200
304+ # assert len(response.elements) > 0
305+ # assert response.elements[0]["metadata"]["partitioner_type"] == "vlm_partition"
306+
307+ # @pytest.mark.parametrize("split_pdf", [True, False])
308+ # @pytest.mark.parametrize("vlm_model", ["claude-3-5-sonnet-20241022",])
309+ # @pytest.mark.parametrize("vlm_model_provider", ["anthropic"])
310+ # @pytest.mark.parametrize(
311+ # "filename",
312+ # [
313+ # "layout-parser-paper-fast.pdf",
314+ # "fake-power-point.ppt",
315+ # "embedded-images-tables.jpg",
316+ # ]
317+ # )
318+ # def test_partition_strategy_vlm_anthropic(split_pdf, vlm_model, vlm_model_provider, client, doc_path, filename):
319+ # with open(doc_path / filename, "rb") as f:
320+ # files = shared.Files(
321+ # content=f.read(),
322+ # file_name=filename,
323+ # )
324+
325+ # req = operations.PartitionRequest(
326+ # partition_parameters=shared.PartitionParameters(
327+ # files=files,
328+ # strategy="vlm",
329+ # vlm_model=vlm_model,
330+ # vlm_model_provider=vlm_model_provider,
331+ # languages=["eng"],
332+ # split_pdf_page=split_pdf,
333+ # )
334+ # )
335+
336+ # response = client.general.partition(
337+ # request=req
338+ # )
339+ # assert response.status_code == 200
340+ # assert len(response.elements) > 0
341+ # assert response.elements[0]["metadata"]["partitioner_type"] == "vlm_partition"
342342
343343
344344def test_returns_422_for_invalid_pdf (
0 commit comments