@@ -361,7 +361,11 @@ def pipeline_api(
361361
362362 hi_res_model_name = _validate_hi_res_model_name (hi_res_model_name , coordinates )
363363 strategy = _validate_strategy (strategy )
364- pdf_infer_table_structure = _set_pdf_infer_table_structure (pdf_infer_table_structure , strategy )
364+ pdf_infer_table_structure = _set_pdf_infer_table_structure (
365+ pdf_infer_table_structure ,
366+ strategy ,
367+ skip_infer_table_types ,
368+ )
365369
366370 # Parallel mode is set by env variable
367371 enable_parallel_mode = os .environ .get ("UNSTRUCTURED_PARALLEL_MODE_ENABLED" , "false" )
@@ -441,9 +445,9 @@ def pipeline_api(
441445 )
442446 elif hi_res_model_name and hi_res_model_name in CHIPPER_MODEL_TYPES :
443447 with ChipperMemoryProtection ():
444- elements = partition (** partition_kwargs ) # pyright: ignore[reportGeneralTypeIssues]
448+ elements = partition (** partition_kwargs ) # type: ignore # pyright: ignore[reportGeneralTypeIssues]
445449 else :
446- elements = partition (** partition_kwargs ) # pyright: ignore[reportGeneralTypeIssues]
450+ elements = partition (** partition_kwargs ) # type: ignore # pyright: ignore[reportGeneralTypeIssues]
447451
448452 except OSError as e :
449453 if isinstance (e .args [0 ], str ) and (
@@ -595,8 +599,13 @@ def _validate_chunking_strategy(chunking_strategy: Optional[str]) -> Optional[st
595599 return chunking_strategy
596600
597601
598- def _set_pdf_infer_table_structure (pdf_infer_table_structure : bool , strategy : str ) -> bool :
602+ def _set_pdf_infer_table_structure (
603+ pdf_infer_table_structure : bool , strategy : str , skip_infer_table_types : Optional [List [str ]]
604+ ) -> bool :
599605 """Avoids table inference in "fast" and "ocr_only" runs."""
606+ # NOTE(robinson) - line below is for type checking
607+ skip_infer_table_types = [] if skip_infer_table_types is None else skip_infer_table_types
608+ pdf_infer_table_structure = pdf_infer_table_structure and ("pdf" not in skip_infer_table_types )
600609 return strategy in ("hi_res" , "auto" ) and pdf_infer_table_structure
601610
602611
@@ -704,7 +713,7 @@ def return_content_type(filename: str):
704713
705714
706715@router .get ("/general/v0/general" , include_in_schema = False )
707- @router .get ("/general/v0.0.68 /general" , include_in_schema = False )
716+ @router .get ("/general/v0.0.69 /general" , include_in_schema = False )
708717async def handle_invalid_get_request ():
709718 raise HTTPException (
710719 status_code = status .HTTP_405_METHOD_NOT_ALLOWED , detail = "Only POST requests are supported."
@@ -719,7 +728,7 @@ async def handle_invalid_get_request():
719728 description = "Description" ,
720729 operation_id = "partition_parameters" ,
721730)
722- @router .post ("/general/v0.0.68 /general" , include_in_schema = False )
731+ @router .post ("/general/v0.0.69 /general" , include_in_schema = False )
723732def general_partition (
724733 request : Request ,
725734 # cannot use annotated type here because of a bug described here:
0 commit comments