From 02f5ec8793dc29eb452decf65b9007c3970dfaf1 Mon Sep 17 00:00:00 2001 From: Bvicii Date: Fri, 13 Mar 2026 01:19:59 -0400 Subject: [PATCH 1/4] feat: add type hints to paddleocr/ public API (Phase 1) Add comprehensive type annotations across the entire paddleocr/ package for IDE autocompletion and static type checking support. - Add PEP 561 py.typed marker and shared _types.py (ImageInput, InputType, PredictResult) - Annotate all model base classes, mixins, and 13 model subclasses - Annotate all pipeline base, utils, and 10 pipeline subclasses - Annotate utility modules (_utils/cli, deprecation, logging) and core modules - Add mypy configuration in pyproject.toml (Python 3.8 compat via `from __future__ import annotations`) - Add mypy type check step to CI codestyle workflow --- .github/workflows/codestyle.yml | 5 + paddleocr/__main__.py | 2 +- paddleocr/_abstract.py | 8 +- paddleocr/_common_args.py | 13 +- paddleocr/_constants.py | 18 +- paddleocr/_env.py | 4 +- paddleocr/_models/_doc_vlm.py | 18 +- paddleocr/_models/_image_classification.py | 18 +- paddleocr/_models/_object_detection.py | 26 +- paddleocr/_models/_text_detection.py | 25 +- paddleocr/_models/base.py | 35 +- paddleocr/_models/chart_parsing.py | 16 +- .../doc_img_orientation_classification.py | 12 +- paddleocr/_models/doc_vlm.py | 16 +- paddleocr/_models/formula_recognition.py | 26 +- paddleocr/_models/layout_detection.py | 12 +- paddleocr/_models/seal_text_detection.py | 16 +- paddleocr/_models/table_cells_detection.py | 12 +- paddleocr/_models/table_classification.py | 12 +- .../_models/table_structure_recognition.py | 26 +- paddleocr/_models/text_detection.py | 16 +- paddleocr/_models/text_image_unwarping.py | 26 +- paddleocr/_models/text_recognition.py | 24 +- .../textline_orientation_classification.py | 12 +- paddleocr/_pipelines/base.py | 36 +- paddleocr/_pipelines/doc_preprocessor.py | 51 +-- paddleocr/_pipelines/doc_understanding.py | 37 +- paddleocr/_pipelines/formula_recognition.py | 105 +++--- paddleocr/_pipelines/ocr.py | 128 ++++--- paddleocr/_pipelines/paddleocr_vl.py | 189 +++++----- paddleocr/_pipelines/pp_chatocrv4_doc.py | 271 +++++++------- paddleocr/_pipelines/pp_doctranslation.py | 345 +++++++++--------- paddleocr/_pipelines/pp_structurev3.py | 301 +++++++-------- paddleocr/_pipelines/seal_recognition.py | 139 +++---- paddleocr/_pipelines/table_recognition_v2.py | 169 +++++---- paddleocr/_pipelines/utils.py | 6 +- paddleocr/_types.py | 30 ++ paddleocr/_utils/cli.py | 22 +- paddleocr/_utils/deprecation.py | 13 +- paddleocr/_utils/logging.py | 8 +- paddleocr/py.typed | 0 pyproject.toml | 11 + 42 files changed, 1251 insertions(+), 1008 deletions(-) create mode 100644 paddleocr/_types.py create mode 100644 paddleocr/py.typed diff --git a/.github/workflows/codestyle.yml b/.github/workflows/codestyle.yml index 1116e59992f..4bba9078857 100644 --- a/.github/workflows/codestyle.yml +++ b/.github/workflows/codestyle.yml @@ -34,3 +34,8 @@ jobs: - uses: pre-commit/action@v3.0.1 with: extra_args: '--all-files' + + - name: Type check with mypy + run: | + pip install mypy numpy + mypy paddleocr/ diff --git a/paddleocr/__main__.py b/paddleocr/__main__.py index abc3c1b4639..faddf2b32d3 100644 --- a/paddleocr/__main__.py +++ b/paddleocr/__main__.py @@ -18,7 +18,7 @@ from ._cli import main -def console_entry() -> int: +def console_entry() -> None: # See https://docs.python.org/3/library/signal.html#note-on-sigpipe try: # Flush output here to force SIGPIPE to be triggered while inside this diff --git a/paddleocr/_abstract.py b/paddleocr/_abstract.py index 773e4fbb725..beb2b7354a1 100644 --- a/paddleocr/_abstract.py +++ b/paddleocr/_abstract.py @@ -12,14 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import abc +import argparse +from typing import Any class CLISubcommandExecutor(metaclass=abc.ABCMeta): @abc.abstractmethod - def add_subparser(self, subparsers): + def add_subparser(self, subparsers: argparse._SubParsersAction) -> argparse.ArgumentParser: raise NotImplementedError @abc.abstractmethod - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: raise NotImplementedError diff --git a/paddleocr/_common_args.py b/paddleocr/_common_args.py index 6054e5ae6ad..7db35eec4c3 100644 --- a/paddleocr/_common_args.py +++ b/paddleocr/_common_args.py @@ -12,6 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + +import argparse +from typing import Any + from paddlex.inference import PaddlePredictorOption from paddlex.utils.device import get_default_device, parse_device @@ -28,7 +33,7 @@ from ._utils.cli import str2bool -def parse_common_args(kwargs, *, default_enable_hpi): +def parse_common_args(kwargs: dict[str, Any], *, default_enable_hpi: bool | None) -> dict[str, Any]: default_vals = { "device": DEFAULT_DEVICE, "enable_hpi": default_enable_hpi, @@ -57,13 +62,13 @@ def parse_common_args(kwargs, *, default_enable_hpi): return kwargs -def prepare_common_init_args(model_name, common_args): +def prepare_common_init_args(model_name: str | None, common_args: dict[str, Any]) -> dict[str, Any]: device = common_args["device"] if device is None: device = get_default_device() device_type, _ = parse_device(device) - init_kwargs = {} + init_kwargs: dict[str, Any] = {} init_kwargs["device"] = device init_kwargs["use_hpip"] = common_args["enable_hpi"] @@ -94,7 +99,7 @@ def prepare_common_init_args(model_name, common_args): return init_kwargs -def add_common_cli_opts(parser, *, default_enable_hpi, allow_multiple_devices): +def add_common_cli_opts(parser: argparse.ArgumentParser, *, default_enable_hpi: bool | None, allow_multiple_devices: bool) -> None: if allow_multiple_devices: help_ = "Device(s) to use for inference, e.g., `cpu`, `gpu`, `npu`, `gpu:0`, `gpu:0,1`. If multiple devices are specified, inference will be performed in parallel. Note that parallel inference is not always supported. By default, GPU 0 will be used if available; otherwise, the CPU will be used." else: diff --git a/paddleocr/_constants.py b/paddleocr/_constants.py index d96ba780ffa..458d857abc1 100644 --- a/paddleocr/_constants.py +++ b/paddleocr/_constants.py @@ -12,11 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -DEFAULT_DEVICE = None -DEFAULT_USE_TENSORRT = False -DEFAULT_PRECISION = "fp32" -DEFAULT_ENABLE_MKLDNN = True -DEFAULT_MKLDNN_CACHE_CAPACITY = 10 -DEFAULT_CPU_THREADS = 10 -SUPPORTED_PRECISION_LIST = ["fp32", "fp16"] -DEFAULT_USE_CINN = False +from __future__ import annotations + +DEFAULT_DEVICE: str | None = None +DEFAULT_USE_TENSORRT: bool = False +DEFAULT_PRECISION: str = "fp32" +DEFAULT_ENABLE_MKLDNN: bool = True +DEFAULT_MKLDNN_CACHE_CAPACITY: int = 10 +DEFAULT_CPU_THREADS: int = 10 +SUPPORTED_PRECISION_LIST: list[str] = ["fp32", "fp16"] +DEFAULT_USE_CINN: bool = False diff --git a/paddleocr/_env.py b/paddleocr/_env.py index 9f90e177168..7e25205c64b 100644 --- a/paddleocr/_env.py +++ b/paddleocr/_env.py @@ -12,8 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import os -DISABLE_AUTO_LOGGING_CONFIG = ( +DISABLE_AUTO_LOGGING_CONFIG: bool = ( os.getenv("PADDLEOCR_DISABLE_AUTO_LOGGING_CONFIG", "0") == "1" ) diff --git a/paddleocr/_models/_doc_vlm.py b/paddleocr/_models/_doc_vlm.py index 18134fd0f4b..6cd3aa45a34 100644 --- a/paddleocr/_models/_doc_vlm.py +++ b/paddleocr/_models/_doc_vlm.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import abc +import argparse +from typing import Any from .._utils.cli import ( get_subcommand_args, @@ -25,13 +29,13 @@ class BaseDocVLM(PaddleXPredictorWrapper): def __init__( self, - *args, - **kwargs, - ): - self._extra_init_args = {} + *args: Any, + **kwargs: Any, + ) -> None: + self._extra_init_args: dict[str, Any] = {} super().__init__(*args, **kwargs) - def _get_extra_paddlex_predictor_init_args(self): + def _get_extra_paddlex_predictor_init_args(self) -> dict[str, Any]: return self._extra_init_args @@ -40,10 +44,10 @@ class BaseDocVLMSubcommandExecutor(PredictorCLISubcommandExecutor): @property @abc.abstractmethod - def wrapper_cls(self): + def wrapper_cls(self) -> type[PaddleXPredictorWrapper]: raise NotImplementedError - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) params["input"] = self.input_validator(params["input"]) perform_simple_inference(self.wrapper_cls, params) diff --git a/paddleocr/_models/_image_classification.py b/paddleocr/_models/_image_classification.py index 2bad088deb0..bb8af8058be 100644 --- a/paddleocr/_models/_image_classification.py +++ b/paddleocr/_models/_image_classification.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import abc +import argparse +from typing import Any from .._utils.cli import ( add_simple_inference_args, @@ -26,20 +30,20 @@ class ImageClassification(PaddleXPredictorWrapper): def __init__( self, *, - topk=None, - **kwargs, - ): + topk: int | None = None, + **kwargs: Any, + ) -> None: self._extra_init_args = { "topk": topk, } super().__init__(**kwargs) - def _get_extra_paddlex_predictor_init_args(self): + def _get_extra_paddlex_predictor_init_args(self) -> dict[str, Any]: return self._extra_init_args class ImageClassificationSubcommandExecutor(PredictorCLISubcommandExecutor): - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: add_simple_inference_args(subparser) subparser.add_argument( @@ -50,9 +54,9 @@ def _update_subparser(self, subparser): @property @abc.abstractmethod - def wrapper_cls(self): + def wrapper_cls(self) -> type[PaddleXPredictorWrapper]: raise NotImplementedError - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) perform_simple_inference(self.wrapper_cls, params) diff --git a/paddleocr/_models/_object_detection.py b/paddleocr/_models/_object_detection.py index f7615d9f8f0..28082b86539 100644 --- a/paddleocr/_models/_object_detection.py +++ b/paddleocr/_models/_object_detection.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import abc +import argparse +from typing import Any from .._utils.cli import ( add_simple_inference_args, @@ -27,13 +31,13 @@ class ObjectDetection(PaddleXPredictorWrapper): def __init__( self, *, - img_size=None, - threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - **kwargs, - ): + img_size: int | None = None, + threshold: float | None = None, + layout_nms: bool | None = None, + layout_unclip_ratio: float | None = None, + layout_merge_bboxes_mode: str | None = None, + **kwargs: Any, + ) -> None: self._extra_init_args = { "img_size": img_size, "threshold": threshold, @@ -43,12 +47,12 @@ def __init__( } super().__init__(**kwargs) - def _get_extra_paddlex_predictor_init_args(self): + def _get_extra_paddlex_predictor_init_args(self) -> dict[str, Any]: return self._extra_init_args class ObjectDetectionSubcommandExecutor(PredictorCLISubcommandExecutor): - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: add_simple_inference_args(subparser) subparser.add_argument( @@ -79,9 +83,9 @@ def _update_subparser(self, subparser): @property @abc.abstractmethod - def wrapper_cls(self): + def wrapper_cls(self) -> type[PaddleXPredictorWrapper]: raise NotImplementedError - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) perform_simple_inference(self.wrapper_cls, params) diff --git a/paddleocr/_models/_text_detection.py b/paddleocr/_models/_text_detection.py index 22122d3a70d..3155a139f61 100644 --- a/paddleocr/_models/_text_detection.py +++ b/paddleocr/_models/_text_detection.py @@ -12,19 +12,24 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + +import argparse +from typing import Any + class TextDetectionMixin: def __init__( self, *, - limit_side_len=None, - limit_type=None, - thresh=None, - box_thresh=None, - unclip_ratio=None, - input_shape=None, - **kwargs, - ): + limit_side_len: int | None = None, + limit_type: str | None = None, + thresh: float | None = None, + box_thresh: float | None = None, + unclip_ratio: float | None = None, + input_shape: tuple[int, int, int] | None = None, + **kwargs: Any, + ) -> None: self._extra_init_args = { "limit_side_len": limit_side_len, "limit_type": limit_type, @@ -35,12 +40,12 @@ def __init__( } super().__init__(**kwargs) - def _get_extra_paddlex_predictor_init_args(self): + def _get_extra_paddlex_predictor_init_args(self) -> dict[str, Any]: return self._extra_init_args class TextDetectionSubcommandExecutorMixin: - def _add_text_detection_args(self, subparser): + def _add_text_detection_args(self, subparser: argparse.ArgumentParser) -> None: subparser.add_argument( "--limit_side_len", type=int, diff --git a/paddleocr/_models/base.py b/paddleocr/_models/base.py index 2dd88bc0db9..bb2607b7d8d 100644 --- a/paddleocr/_models/base.py +++ b/paddleocr/_models/base.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import abc +import argparse +from typing import Any, Iterator from paddlex import create_predictor from paddlex.utils.deps import DependencyError @@ -23,18 +27,19 @@ parse_common_args, prepare_common_init_args, ) +from .._types import PredictResult -_DEFAULT_ENABLE_HPI = False +_DEFAULT_ENABLE_HPI: bool = False class PaddleXPredictorWrapper(metaclass=abc.ABCMeta): def __init__( self, *, - model_name=None, - model_dir=None, - **common_args, - ): + model_name: str | None = None, + model_dir: str | None = None, + **common_args: Any, + ) -> None: super().__init__() self._model_name = ( model_name if model_name is not None else self.default_model_name @@ -47,28 +52,28 @@ def __init__( @property @abc.abstractmethod - def default_model_name(self): + def default_model_name(self) -> str: raise NotImplementedError - def predict_iter(self, *args, **kwargs): + def predict_iter(self, *args: Any, **kwargs: Any) -> Iterator[PredictResult]: return self.paddlex_predictor.predict(*args, **kwargs) - def predict(self, *args, **kwargs): + def predict(self, *args: Any, **kwargs: Any) -> list[PredictResult]: result = list(self.predict_iter(*args, **kwargs)) return result - def close(self): + def close(self) -> None: self.paddlex_predictor.close() @classmethod @abc.abstractmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: raise NotImplementedError - def _get_extra_paddlex_predictor_init_args(self): + def _get_extra_paddlex_predictor_init_args(self) -> dict[str, Any]: return {} - def _create_paddlex_predictor(self): + def _create_paddlex_predictor(self) -> Any: kwargs = prepare_common_init_args(self._model_name, self._common_args) kwargs = {**self._get_extra_paddlex_predictor_init_args(), **kwargs} # Should we check model names? @@ -85,10 +90,10 @@ def _create_paddlex_predictor(self): class PredictorCLISubcommandExecutor(CLISubcommandExecutor): @property @abc.abstractmethod - def subparser_name(self): + def subparser_name(self) -> str: raise NotImplementedError - def add_subparser(self, subparsers): + def add_subparser(self, subparsers: argparse._SubParsersAction) -> argparse.ArgumentParser: subparser = subparsers.add_parser(name=self.subparser_name) self._update_subparser(subparser) subparser.add_argument("--model_name", type=str, help="Name of the model.") @@ -103,5 +108,5 @@ def add_subparser(self, subparsers): return subparser @abc.abstractmethod - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: raise NotImplementedError diff --git a/paddleocr/_models/chart_parsing.py b/paddleocr/_models/chart_parsing.py index 0078bff813a..16d98ea6312 100644 --- a/paddleocr/_models/chart_parsing.py +++ b/paddleocr/_models/chart_parsing.py @@ -12,7 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + +import argparse + +from .._abstract import CLISubcommandExecutor from .._utils.cli import add_simple_inference_args +from .base import PaddleXPredictorWrapper from ._doc_vlm import ( BaseDocVLM, BaseDocVLMSubcommandExecutor, @@ -21,24 +27,24 @@ class ChartParsing(BaseDocVLM): @property - def default_model_name(self): + def default_model_name(self) -> str: return "PP-Chart2Table" @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return ChartParsingSubcommandExecutor() class ChartParsingSubcommandExecutor(BaseDocVLMSubcommandExecutor): @property - def subparser_name(self): + def subparser_name(self) -> str: return "chart_parsing" @property - def wrapper_cls(self): + def wrapper_cls(self) -> type[PaddleXPredictorWrapper]: return ChartParsing - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: add_simple_inference_args( subparser, input_help='Input dict, e.g. `{"image": "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/chart_parsing_02.png"}`.', diff --git a/paddleocr/_models/doc_img_orientation_classification.py b/paddleocr/_models/doc_img_orientation_classification.py index 7339dfefa70..ee216b1b7a4 100644 --- a/paddleocr/_models/doc_img_orientation_classification.py +++ b/paddleocr/_models/doc_img_orientation_classification.py @@ -12,6 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + +from .._abstract import CLISubcommandExecutor +from .base import PaddleXPredictorWrapper from ._image_classification import ( ImageClassification, ImageClassificationSubcommandExecutor, @@ -20,11 +24,11 @@ class DocImgOrientationClassification(ImageClassification): @property - def default_model_name(self): + def default_model_name(self) -> str: return "PP-LCNet_x1_0_doc_ori" @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return DocImgOrientationClassificationSubcommandExecutor() @@ -32,9 +36,9 @@ class DocImgOrientationClassificationSubcommandExecutor( ImageClassificationSubcommandExecutor ): @property - def subparser_name(self): + def subparser_name(self) -> str: return "doc_img_orientation_classification" @property - def wrapper_cls(self): + def wrapper_cls(self) -> type[PaddleXPredictorWrapper]: return DocImgOrientationClassification diff --git a/paddleocr/_models/doc_vlm.py b/paddleocr/_models/doc_vlm.py index b1452a3112d..52ea60096b9 100644 --- a/paddleocr/_models/doc_vlm.py +++ b/paddleocr/_models/doc_vlm.py @@ -12,7 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + +import argparse + +from .._abstract import CLISubcommandExecutor from .._utils.cli import add_simple_inference_args +from .base import PaddleXPredictorWrapper from ._doc_vlm import ( BaseDocVLM, BaseDocVLMSubcommandExecutor, @@ -21,24 +27,24 @@ class DocVLM(BaseDocVLM): @property - def default_model_name(self): + def default_model_name(self) -> str: return "PP-DocBee2-3B" @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return DocVLMSubcommandExecutor() class DocVLMSubcommandExecutor(BaseDocVLMSubcommandExecutor): @property - def subparser_name(self): + def subparser_name(self) -> str: return "doc_vlm" @property - def wrapper_cls(self): + def wrapper_cls(self) -> type[PaddleXPredictorWrapper]: return DocVLM - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: add_simple_inference_args( subparser, input_help='Input dict, e.g. `{"image": "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/medal_table.png", "query": "Recognize this table"}`.', diff --git a/paddleocr/_models/formula_recognition.py b/paddleocr/_models/formula_recognition.py index 99d2ba702cb..cf9f9a33d60 100644 --- a/paddleocr/_models/formula_recognition.py +++ b/paddleocr/_models/formula_recognition.py @@ -12,6 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + +import argparse +from typing import Any + +from .._abstract import CLISubcommandExecutor from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, @@ -23,32 +29,32 @@ class FormulaRecognition(PaddleXPredictorWrapper): def __init__( self, - *args, - **kwargs, - ): - self._extra_init_args = {} + *args: Any, + **kwargs: Any, + ) -> None: + self._extra_init_args: dict[str, Any] = {} super().__init__(*args, **kwargs) @property - def default_model_name(self): + def default_model_name(self) -> str: return "PP-FormulaNet_plus-M" @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return FormulaRecognitionSubcommandExecutor() - def _get_extra_paddlex_predictor_init_args(self): + def _get_extra_paddlex_predictor_init_args(self) -> dict[str, Any]: return self._extra_init_args class FormulaRecognitionSubcommandExecutor(PredictorCLISubcommandExecutor): @property - def subparser_name(self): + def subparser_name(self) -> str: return "formula_recognition" - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: add_simple_inference_args(subparser) - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) perform_simple_inference(FormulaRecognition, params) diff --git a/paddleocr/_models/layout_detection.py b/paddleocr/_models/layout_detection.py index 14427fba8bc..991638c0c07 100644 --- a/paddleocr/_models/layout_detection.py +++ b/paddleocr/_models/layout_detection.py @@ -12,6 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + +from .._abstract import CLISubcommandExecutor +from .base import PaddleXPredictorWrapper from ._object_detection import ( ObjectDetection, ObjectDetectionSubcommandExecutor, @@ -20,19 +24,19 @@ class LayoutDetection(ObjectDetection): @property - def default_model_name(self): + def default_model_name(self) -> str: return "PP-DocLayout_plus-L" @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return LayoutDetectionSubcommandExecutor() class LayoutDetectionSubcommandExecutor(ObjectDetectionSubcommandExecutor): @property - def subparser_name(self): + def subparser_name(self) -> str: return "layout_detection" @property - def wrapper_cls(self): + def wrapper_cls(self) -> type[PaddleXPredictorWrapper]: return LayoutDetection diff --git a/paddleocr/_models/seal_text_detection.py b/paddleocr/_models/seal_text_detection.py index adfff705268..d9e7f904fb6 100644 --- a/paddleocr/_models/seal_text_detection.py +++ b/paddleocr/_models/seal_text_detection.py @@ -12,6 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + +import argparse +from typing import Any + +from .._abstract import CLISubcommandExecutor from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, @@ -23,11 +29,11 @@ class SealTextDetection(TextDetectionMixin, PaddleXPredictorWrapper): @property - def default_model_name(self): + def default_model_name(self) -> str: return "PP-OCRv4_mobile_seal_det" @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return SealTextDetectionSubcommandExecutor() @@ -35,13 +41,13 @@ class SealTextDetectionSubcommandExecutor( TextDetectionSubcommandExecutorMixin, PredictorCLISubcommandExecutor ): @property - def subparser_name(self): + def subparser_name(self) -> str: return "seal_text_detection" - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: add_simple_inference_args(subparser) self._add_text_detection_args(subparser) - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) perform_simple_inference(SealTextDetection, params) diff --git a/paddleocr/_models/table_cells_detection.py b/paddleocr/_models/table_cells_detection.py index afca6ed78b0..71c825cc56d 100644 --- a/paddleocr/_models/table_cells_detection.py +++ b/paddleocr/_models/table_cells_detection.py @@ -12,6 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + +from .._abstract import CLISubcommandExecutor +from .base import PaddleXPredictorWrapper from ._object_detection import ( ObjectDetection, ObjectDetectionSubcommandExecutor, @@ -20,19 +24,19 @@ class TableCellsDetection(ObjectDetection): @property - def default_model_name(self): + def default_model_name(self) -> str: return "RT-DETR-L_wired_table_cell_det" @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return TableCellsDetectionSubcommandExecutor() class TableCellsDetectionSubcommandExecutor(ObjectDetectionSubcommandExecutor): @property - def subparser_name(self): + def subparser_name(self) -> str: return "table_cells_detection" @property - def wrapper_cls(self): + def wrapper_cls(self) -> type[PaddleXPredictorWrapper]: return TableCellsDetection diff --git a/paddleocr/_models/table_classification.py b/paddleocr/_models/table_classification.py index 028e8d830cd..3edc9e1da5f 100644 --- a/paddleocr/_models/table_classification.py +++ b/paddleocr/_models/table_classification.py @@ -12,6 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + +from .._abstract import CLISubcommandExecutor +from .base import PaddleXPredictorWrapper from ._image_classification import ( ImageClassification, ImageClassificationSubcommandExecutor, @@ -20,19 +24,19 @@ class TableClassification(ImageClassification): @property - def default_model_name(self): + def default_model_name(self) -> str: return "PP-LCNet_x1_0_table_cls" @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return TableClassificationSubcommandExecutor() class TableClassificationSubcommandExecutor(ImageClassificationSubcommandExecutor): @property - def subparser_name(self): + def subparser_name(self) -> str: return "table_classification" @property - def wrapper_cls(self): + def wrapper_cls(self) -> type[PaddleXPredictorWrapper]: return TableClassification diff --git a/paddleocr/_models/table_structure_recognition.py b/paddleocr/_models/table_structure_recognition.py index 270bd7b27b3..186e6fd13ff 100644 --- a/paddleocr/_models/table_structure_recognition.py +++ b/paddleocr/_models/table_structure_recognition.py @@ -12,6 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + +import argparse +from typing import Any + +from .._abstract import CLISubcommandExecutor from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, @@ -23,32 +29,32 @@ class TableStructureRecognition(PaddleXPredictorWrapper): def __init__( self, - *args, - **kwargs, - ): - self._extra_init_args = {} + *args: Any, + **kwargs: Any, + ) -> None: + self._extra_init_args: dict[str, Any] = {} super().__init__(*args, **kwargs) @property - def default_model_name(self): + def default_model_name(self) -> str: return "SLANet" @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return TableStructureRecognitionSubcommandExecutor() - def _get_extra_paddlex_predictor_init_args(self): + def _get_extra_paddlex_predictor_init_args(self) -> dict[str, Any]: return self._extra_init_args class TableStructureRecognitionSubcommandExecutor(PredictorCLISubcommandExecutor): @property - def subparser_name(self): + def subparser_name(self) -> str: return "table_structure_recognition" - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: add_simple_inference_args(subparser) - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) perform_simple_inference(TableStructureRecognition, params) diff --git a/paddleocr/_models/text_detection.py b/paddleocr/_models/text_detection.py index 809dbd23240..67d77a5c53e 100644 --- a/paddleocr/_models/text_detection.py +++ b/paddleocr/_models/text_detection.py @@ -12,6 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + +import argparse +from typing import Any + +from .._abstract import CLISubcommandExecutor from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, @@ -23,11 +29,11 @@ class TextDetection(TextDetectionMixin, PaddleXPredictorWrapper): @property - def default_model_name(self): + def default_model_name(self) -> str: return "PP-OCRv5_server_det" @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return TextDetectionSubcommandExecutor() @@ -35,13 +41,13 @@ class TextDetectionSubcommandExecutor( TextDetectionSubcommandExecutorMixin, PredictorCLISubcommandExecutor ): @property - def subparser_name(self): + def subparser_name(self) -> str: return "text_detection" - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: add_simple_inference_args(subparser) self._add_text_detection_args(subparser) - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) perform_simple_inference(TextDetection, params) diff --git a/paddleocr/_models/text_image_unwarping.py b/paddleocr/_models/text_image_unwarping.py index 6dbbe5698cd..d374f53f18f 100644 --- a/paddleocr/_models/text_image_unwarping.py +++ b/paddleocr/_models/text_image_unwarping.py @@ -12,6 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + +import argparse +from typing import Any + +from .._abstract import CLISubcommandExecutor from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, @@ -23,32 +29,32 @@ class TextImageUnwarping(PaddleXPredictorWrapper): def __init__( self, - *args, - **kwargs, - ): - self._extra_init_args = {} + *args: Any, + **kwargs: Any, + ) -> None: + self._extra_init_args: dict[str, Any] = {} super().__init__(*args, **kwargs) @property - def default_model_name(self): + def default_model_name(self) -> str: return "UVDoc" @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return TextImageUnwarpingSubcommandExecutor() - def _get_extra_paddlex_predictor_init_args(self): + def _get_extra_paddlex_predictor_init_args(self) -> dict[str, Any]: return self._extra_init_args class TextImageUnwarpingSubcommandExecutor(PredictorCLISubcommandExecutor): @property - def subparser_name(self): + def subparser_name(self) -> str: return "text_image_unwarping" - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: add_simple_inference_args(subparser) - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) perform_simple_inference(TextImageUnwarping, params) diff --git a/paddleocr/_models/text_recognition.py b/paddleocr/_models/text_recognition.py index 4f96f8e84de..5392332fe8e 100644 --- a/paddleocr/_models/text_recognition.py +++ b/paddleocr/_models/text_recognition.py @@ -12,6 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + +import argparse +from typing import Any + +from .._abstract import CLISubcommandExecutor from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, @@ -24,32 +30,32 @@ class TextRecognition(PaddleXPredictorWrapper): def __init__( self, *, - input_shape=None, - **kwargs, - ): + input_shape: tuple[int, int, int] | None = None, + **kwargs: Any, + ) -> None: self._extra_init_args = { "input_shape": input_shape, } super().__init__(**kwargs) @property - def default_model_name(self): + def default_model_name(self) -> str: return "PP-OCRv5_server_rec" @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return TextRecognitionSubcommandExecutor() - def _get_extra_paddlex_predictor_init_args(self): + def _get_extra_paddlex_predictor_init_args(self) -> dict[str, Any]: return self._extra_init_args class TextRecognitionSubcommandExecutor(PredictorCLISubcommandExecutor): @property - def subparser_name(self): + def subparser_name(self) -> str: return "text_recognition" - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: add_simple_inference_args(subparser) subparser.add_argument( "--input_shape", @@ -59,6 +65,6 @@ def _update_subparser(self, subparser): help="Input shape of the model.", ) - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) perform_simple_inference(TextRecognition, params) diff --git a/paddleocr/_models/textline_orientation_classification.py b/paddleocr/_models/textline_orientation_classification.py index 908c0ddf165..81ef9b1f7ad 100644 --- a/paddleocr/_models/textline_orientation_classification.py +++ b/paddleocr/_models/textline_orientation_classification.py @@ -12,6 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + +from .._abstract import CLISubcommandExecutor +from .base import PaddleXPredictorWrapper from ._image_classification import ( ImageClassification, ImageClassificationSubcommandExecutor, @@ -20,11 +24,11 @@ class TextLineOrientationClassification(ImageClassification): @property - def default_model_name(self): + def default_model_name(self) -> str: return "PP-LCNet_x0_25_textline_ori" @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return TextLineOrientationClassificationSubcommandExecutor() @@ -32,9 +36,9 @@ class TextLineOrientationClassificationSubcommandExecutor( ImageClassificationSubcommandExecutor ): @property - def subparser_name(self): + def subparser_name(self) -> str: return "textline_orientation_classification" @property - def wrapper_cls(self): + def wrapper_cls(self) -> type[PaddleXPredictorWrapper]: return TextLineOrientationClassification diff --git a/paddleocr/_pipelines/base.py b/paddleocr/_pipelines/base.py index f400cab41e9..7a34b8f6d89 100644 --- a/paddleocr/_pipelines/base.py +++ b/paddleocr/_pipelines/base.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import abc +import argparse +from typing import Any import yaml from paddlex import create_pipeline @@ -27,10 +31,10 @@ prepare_common_init_args, ) -_DEFAULT_ENABLE_HPI = None +_DEFAULT_ENABLE_HPI: bool | None = None -def _merge_dicts(d1, d2): +def _merge_dicts(d1: dict[str, Any], d2: dict[str, Any]) -> dict[str, Any]: res = d1.copy() for k, v in d2.items(): if k in res and isinstance(res[k], dict) and isinstance(v, dict): @@ -40,7 +44,7 @@ def _merge_dicts(d1, d2): return res -def _to_builtin(obj): +def _to_builtin(obj: Any) -> Any: if isinstance(obj, AttrDict): return {k: _to_builtin(v) for k, v in obj.items()} elif isinstance(obj, dict): @@ -55,9 +59,9 @@ class PaddleXPipelineWrapper(metaclass=abc.ABCMeta): def __init__( self, *, - paddlex_config=None, - **common_args, - ): + paddlex_config: str | dict[str, Any] | None = None, + **common_args: Any, + ) -> None: super().__init__() self._paddlex_config = paddlex_config self._common_args = parse_common_args( @@ -68,26 +72,26 @@ def __init__( @property @abc.abstractmethod - def _paddlex_pipeline_name(self): + def _paddlex_pipeline_name(self) -> str: raise NotImplementedError - def export_paddlex_config_to_yaml(self, yaml_path): + def export_paddlex_config_to_yaml(self, yaml_path: str) -> None: with open(yaml_path, "w", encoding="utf-8") as f: config = _to_builtin(self._merged_paddlex_config) yaml.safe_dump(config, f) - def close(self): + def close(self) -> None: self.paddlex_pipeline.close() @classmethod @abc.abstractmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: raise NotImplementedError - def _get_paddlex_config_overrides(self): + def _get_paddlex_config_overrides(self) -> dict[str, Any]: return {} - def _get_merged_paddlex_config(self): + def _get_merged_paddlex_config(self) -> dict[str, Any]: if self._paddlex_config is None: config = load_pipeline_config(self._paddlex_pipeline_name) elif isinstance(self._paddlex_config, str): @@ -99,7 +103,7 @@ def _get_merged_paddlex_config(self): return _merge_dicts(config, overrides) - def _create_paddlex_pipeline(self): + def _create_paddlex_pipeline(self) -> Any: kwargs = prepare_common_init_args(None, self._common_args) try: return create_pipeline(config=self._merged_paddlex_config, **kwargs) @@ -112,10 +116,10 @@ def _create_paddlex_pipeline(self): class PipelineCLISubcommandExecutor(CLISubcommandExecutor): @property @abc.abstractmethod - def subparser_name(self): + def subparser_name(self) -> str: raise NotImplementedError - def add_subparser(self, subparsers): + def add_subparser(self, subparsers: argparse._SubParsersAction) -> argparse.ArgumentParser: subparser = subparsers.add_parser(name=self.subparser_name) self._update_subparser(subparser) add_common_cli_opts( @@ -131,5 +135,5 @@ def add_subparser(self, subparsers): return subparser @abc.abstractmethod - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: raise NotImplementedError diff --git a/paddleocr/_pipelines/doc_preprocessor.py b/paddleocr/_pipelines/doc_preprocessor.py index b8c34df3773..8404b521874 100644 --- a/paddleocr/_pipelines/doc_preprocessor.py +++ b/paddleocr/_pipelines/doc_preprocessor.py @@ -12,6 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + +import argparse +from typing import Any, Iterator + +from .._abstract import CLISubcommandExecutor +from .._types import InputType, PredictResult from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, @@ -25,14 +32,14 @@ class DocPreprocessor(PaddleXPipelineWrapper): def __init__( self, - doc_orientation_classify_model_name=None, - doc_orientation_classify_model_dir=None, - doc_unwarping_model_name=None, - doc_unwarping_model_dir=None, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - **kwargs, - ): + doc_orientation_classify_model_name: str | None = None, + doc_orientation_classify_model_dir: str | None = None, + doc_unwarping_model_name: str | None = None, + doc_unwarping_model_dir: str | None = None, + use_doc_orientation_classify: bool | None = None, + use_doc_unwarping: bool | None = None, + **kwargs: Any, + ) -> None: self._params = { "doc_orientation_classify_model_name": doc_orientation_classify_model_name, @@ -45,16 +52,16 @@ def __init__( super().__init__(**kwargs) @property - def _paddlex_pipeline_name(self): + def _paddlex_pipeline_name(self) -> str: return "doc_preprocessor" def predict_iter( self, - input, + input: InputType, *, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - ): + use_doc_orientation_classify: bool | None = None, + use_doc_unwarping: bool | None = None, + ) -> Iterator[PredictResult]: return self.paddlex_pipeline.predict( input, use_doc_orientation_classify=use_doc_orientation_classify, @@ -63,11 +70,11 @@ def predict_iter( def predict( self, - input, + input: InputType, *, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - ): + use_doc_orientation_classify: bool | None = None, + use_doc_unwarping: bool | None = None, + ) -> list[PredictResult]: return list( self.predict_iter( input, @@ -77,10 +84,10 @@ def predict( ) @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return DocPreprocessorCLISubcommandExecutor() - def _get_paddlex_config_overrides(self): + def _get_paddlex_config_overrides(self) -> dict[str, Any]: STRUCTURE = { "SubModules.DocOrientationClassify.model_name": self._params[ "doc_orientation_classify_model_name" @@ -104,10 +111,10 @@ def _get_paddlex_config_overrides(self): class DocPreprocessorCLISubcommandExecutor(PipelineCLISubcommandExecutor): @property - def subparser_name(self): + def subparser_name(self) -> str: return "doc_preprocessor" - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: add_simple_inference_args(subparser) subparser.add_argument( @@ -141,7 +148,7 @@ def _update_subparser(self, subparser): help="Whether to use text image unwarping.", ) - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) perform_simple_inference(DocPreprocessor, params) diff --git a/paddleocr/_pipelines/doc_understanding.py b/paddleocr/_pipelines/doc_understanding.py index 0cde72fc97b..502cfcd6a31 100644 --- a/paddleocr/_pipelines/doc_understanding.py +++ b/paddleocr/_pipelines/doc_understanding.py @@ -12,8 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + +import argparse +from typing import Any, Iterator + from paddlex.utils.pipeline_arguments import custom_type +from .._abstract import CLISubcommandExecutor +from .._types import InputType, PredictResult from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, @@ -26,11 +33,11 @@ class DocUnderstanding(PaddleXPipelineWrapper): def __init__( self, - doc_understanding_model_name=None, - doc_understanding_model_dir=None, - doc_understanding_batch_size=None, - **kwargs, - ): + doc_understanding_model_name: str | None = None, + doc_understanding_model_dir: str | None = None, + doc_understanding_batch_size: int | None = None, + **kwargs: Any, + ) -> None: self._params = { "doc_understanding_model_name": doc_understanding_model_name, @@ -40,24 +47,24 @@ def __init__( super().__init__(**kwargs) @property - def _paddlex_pipeline_name(self): + def _paddlex_pipeline_name(self) -> str: return "doc_understanding" - def predict_iter(self, input, **kwargs): + def predict_iter(self, input: InputType, **kwargs: Any) -> Iterator[PredictResult]: return self.paddlex_pipeline.predict(input, **kwargs) def predict( self, - input, - **kwargs, - ): + input: InputType, + **kwargs: Any, + ) -> list[PredictResult]: return list(self.predict_iter(input, **kwargs)) @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return DocUnderstandingCLISubcommandExecutor() - def _get_paddlex_config_overrides(self): + def _get_paddlex_config_overrides(self) -> dict[str, Any]: STRUCTURE = { "SubModules.DocUnderstanding.model_name": self._params[ "doc_understanding_model_name" @@ -76,10 +83,10 @@ class DocUnderstandingCLISubcommandExecutor(PipelineCLISubcommandExecutor): input_validator = staticmethod(custom_type(dict)) @property - def subparser_name(self): + def subparser_name(self) -> str: return "doc_understanding" - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: add_simple_inference_args( subparser, input_help='Input dict, e.g. `{"image": "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/medal_table.png", "query": "Recognize this table"}`.', @@ -101,7 +108,7 @@ def _update_subparser(self, subparser): help="Batch size for the document understanding model.", ) - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) params["input"] = self.input_validator(params["input"]) perform_simple_inference(DocUnderstanding, params) diff --git a/paddleocr/_pipelines/formula_recognition.py b/paddleocr/_pipelines/formula_recognition.py index 7588f81ac80..9b6aaa30d15 100644 --- a/paddleocr/_pipelines/formula_recognition.py +++ b/paddleocr/_pipelines/formula_recognition.py @@ -12,6 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + +import argparse +from typing import Any, Iterator + +from .._abstract import CLISubcommandExecutor +from .._types import InputType, PredictResult from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, @@ -25,27 +32,27 @@ class FormulaRecognitionPipeline(PaddleXPipelineWrapper): def __init__( self, - doc_orientation_classify_model_name=None, - doc_orientation_classify_model_dir=None, - doc_orientation_classify_batch_size=None, - doc_unwarping_model_name=None, - doc_unwarping_model_dir=None, - doc_unwarping_batch_size=None, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - layout_detection_model_name=None, - layout_detection_model_dir=None, - layout_threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - layout_detection_batch_size=None, - use_layout_detection=None, - formula_recognition_model_name=None, - formula_recognition_model_dir=None, - formula_recognition_batch_size=None, - **kwargs, - ): + doc_orientation_classify_model_name: str | None = None, + doc_orientation_classify_model_dir: str | None = None, + doc_orientation_classify_batch_size: int | None = None, + doc_unwarping_model_name: str | None = None, + doc_unwarping_model_dir: str | None = None, + doc_unwarping_batch_size: int | None = None, + use_doc_orientation_classify: bool | None = None, + use_doc_unwarping: bool | None = None, + layout_detection_model_name: str | None = None, + layout_detection_model_dir: str | None = None, + layout_threshold: float | None = None, + layout_nms: bool | None = None, + layout_unclip_ratio: float | None = None, + layout_merge_bboxes_mode: str | None = None, + layout_detection_batch_size: int | None = None, + use_layout_detection: bool | None = None, + formula_recognition_model_name: str | None = None, + formula_recognition_model_dir: str | None = None, + formula_recognition_batch_size: int | None = None, + **kwargs: Any, + ) -> None: params = locals().copy() params.pop("self") params.pop("kwargs") @@ -54,23 +61,23 @@ def __init__( super().__init__(**kwargs) @property - def _paddlex_pipeline_name(self): + def _paddlex_pipeline_name(self) -> str: return "formula_recognition" def predict_iter( self, - input, + input: InputType, *, - use_layout_detection=None, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - layout_det_res=None, - layout_threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - **kwargs, - ): + use_layout_detection: bool | None = None, + use_doc_orientation_classify: bool | None = None, + use_doc_unwarping: bool | None = None, + layout_det_res: Any = None, + layout_threshold: float | None = None, + layout_nms: bool | None = None, + layout_unclip_ratio: float | None = None, + layout_merge_bboxes_mode: str | None = None, + **kwargs: Any, + ) -> Iterator[PredictResult]: return self.paddlex_pipeline.predict( input, use_layout_detection=use_layout_detection, @@ -86,18 +93,18 @@ def predict_iter( def predict( self, - input, + input: InputType, *, - use_layout_detection=None, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - layout_det_res=None, - layout_threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - **kwargs, - ): + use_layout_detection: bool | None = None, + use_doc_orientation_classify: bool | None = None, + use_doc_unwarping: bool | None = None, + layout_det_res: Any = None, + layout_threshold: float | None = None, + layout_nms: bool | None = None, + layout_unclip_ratio: float | None = None, + layout_merge_bboxes_mode: str | None = None, + **kwargs: Any, + ) -> list[PredictResult]: return list( self.predict_iter( input, @@ -114,10 +121,10 @@ def predict( ) @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return FormulaRecognitionPipelineCLISubcommandExecutor() - def _get_paddlex_config_overrides(self): + def _get_paddlex_config_overrides(self) -> dict[str, Any]: STRUCTURE = { "use_layout_detection": self._params["use_layout_detection"], "SubModules.LayoutDetection.model_name": self._params[ @@ -178,10 +185,10 @@ def _get_paddlex_config_overrides(self): class FormulaRecognitionPipelineCLISubcommandExecutor(PipelineCLISubcommandExecutor): @property - def subparser_name(self): + def subparser_name(self) -> str: return "formula_recognition_pipeline" - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: add_simple_inference_args(subparser) subparser.add_argument( @@ -280,6 +287,6 @@ def _update_subparser(self, subparser): help="Batch size for formula recognition.", ) - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) perform_simple_inference(FormulaRecognitionPipeline, params) diff --git a/paddleocr/_pipelines/ocr.py b/paddleocr/_pipelines/ocr.py index 03f24899448..31fd1a805b4 100644 --- a/paddleocr/_pipelines/ocr.py +++ b/paddleocr/_pipelines/ocr.py @@ -12,13 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + # TODO: Should we use a third-party CLI library to auto-generate command-line # arguments from the pipeline class, to reduce boilerplate and improve # maintainability? +import argparse import sys import warnings +from typing import Any, Iterator +from .._abstract import CLISubcommandExecutor +from .._types import InputType, PredictResult from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, @@ -55,34 +61,34 @@ class PaddleOCR(PaddleXPipelineWrapper): def __init__( self, - doc_orientation_classify_model_name=None, - doc_orientation_classify_model_dir=None, - doc_unwarping_model_name=None, - doc_unwarping_model_dir=None, - text_detection_model_name=None, - text_detection_model_dir=None, - textline_orientation_model_name=None, - textline_orientation_model_dir=None, - textline_orientation_batch_size=None, - text_recognition_model_name=None, - text_recognition_model_dir=None, - text_recognition_batch_size=None, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_textline_orientation=None, - text_det_limit_side_len=None, - text_det_limit_type=None, - text_det_thresh=None, - text_det_box_thresh=None, - text_det_unclip_ratio=None, - text_det_input_shape=None, - text_rec_score_thresh=None, - return_word_box=None, - text_rec_input_shape=None, - lang=None, - ocr_version=None, - **kwargs, - ): + doc_orientation_classify_model_name: str | None = None, + doc_orientation_classify_model_dir: str | None = None, + doc_unwarping_model_name: str | None = None, + doc_unwarping_model_dir: str | None = None, + text_detection_model_name: str | None = None, + text_detection_model_dir: str | None = None, + textline_orientation_model_name: str | None = None, + textline_orientation_model_dir: str | None = None, + textline_orientation_batch_size: int | None = None, + text_recognition_model_name: str | None = None, + text_recognition_model_dir: str | None = None, + text_recognition_batch_size: int | None = None, + use_doc_orientation_classify: bool | None = None, + use_doc_unwarping: bool | None = None, + use_textline_orientation: bool | None = None, + text_det_limit_side_len: int | None = None, + text_det_limit_type: str | None = None, + text_det_thresh: float | None = None, + text_det_box_thresh: float | None = None, + text_det_unclip_ratio: float | None = None, + text_det_input_shape: tuple[int, int, int] | None = None, + text_rec_score_thresh: float | None = None, + return_word_box: bool | None = None, + text_rec_input_shape: tuple[int, int, int] | None = None, + lang: str | None = None, + ocr_version: str | None = None, + **kwargs: Any, + ) -> None: if ocr_version is not None and ocr_version not in _SUPPORTED_OCR_VERSIONS: raise ValueError( f"Invalid OCR version: {ocr_version}. Supported values are {_SUPPORTED_OCR_VERSIONS}." @@ -163,24 +169,24 @@ def __init__( super().__init__(**base_params) @property - def _paddlex_pipeline_name(self): + def _paddlex_pipeline_name(self) -> str: return "OCR" def predict_iter( self, - input, + input: InputType, *, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_textline_orientation=None, - text_det_limit_side_len=None, - text_det_limit_type=None, - text_det_thresh=None, - text_det_box_thresh=None, - text_det_unclip_ratio=None, - text_rec_score_thresh=None, - return_word_box=None, - ): + use_doc_orientation_classify: bool | None = None, + use_doc_unwarping: bool | None = None, + use_textline_orientation: bool | None = None, + text_det_limit_side_len: int | None = None, + text_det_limit_type: str | None = None, + text_det_thresh: float | None = None, + text_det_box_thresh: float | None = None, + text_det_unclip_ratio: float | None = None, + text_rec_score_thresh: float | None = None, + return_word_box: bool | None = None, + ) -> Iterator[PredictResult]: return self.paddlex_pipeline.predict( input, use_doc_orientation_classify=use_doc_orientation_classify, @@ -197,19 +203,19 @@ def predict_iter( def predict( self, - input, + input: InputType, *, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_textline_orientation=None, - text_det_limit_side_len=None, - text_det_limit_type=None, - text_det_thresh=None, - text_det_box_thresh=None, - text_det_unclip_ratio=None, - text_rec_score_thresh=None, - return_word_box=None, - ): + use_doc_orientation_classify: bool | None = None, + use_doc_unwarping: bool | None = None, + use_textline_orientation: bool | None = None, + text_det_limit_side_len: int | None = None, + text_det_limit_type: str | None = None, + text_det_thresh: float | None = None, + text_det_box_thresh: float | None = None, + text_det_unclip_ratio: float | None = None, + text_rec_score_thresh: float | None = None, + return_word_box: bool | None = None, + ) -> list[PredictResult]: return list( self.predict_iter( input, @@ -227,14 +233,14 @@ def predict( ) @deprecated("Please use `predict` instead.") - def ocr(self, img, **kwargs): + def ocr(self, img: InputType, **kwargs: Any) -> list[PredictResult]: return self.predict(img, **kwargs) @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return PaddleOCRCLISubcommandExecutor() - def _get_paddlex_config_overrides(self): + def _get_paddlex_config_overrides(self) -> dict[str, Any]: STRUCTURE = { "SubPipelines.DocPreprocessor.SubModules.DocOrientationClassify.model_name": self._params[ "doc_orientation_classify_model_name" @@ -305,7 +311,9 @@ def _get_paddlex_config_overrides(self): } return create_config_from_structure(STRUCTURE) - def _get_ocr_model_names(self, lang, ppocr_version): + def _get_ocr_model_names( + self, lang: str | None, ppocr_version: str | None + ) -> tuple[str | None, str | None]: LATIN_LANGS = [ "af", "az", @@ -513,10 +521,10 @@ def _get_ocr_model_names(self, lang, ppocr_version): class PaddleOCRCLISubcommandExecutor(PipelineCLISubcommandExecutor): @property - def subparser_name(self): + def subparser_name(self) -> str: return "ocr" - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: add_simple_inference_args(subparser) subparser.add_argument( @@ -676,7 +684,7 @@ def _update_subparser(self, subparser): help=f"[Deprecated] Please use `--{new_name}` instead.", ) - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) for name, new_name in _DEPRECATED_PARAM_NAME_MAPPING.items(): assert name in params diff --git a/paddleocr/_pipelines/paddleocr_vl.py b/paddleocr/_pipelines/paddleocr_vl.py index 9e1fc9b0f73..1b027eb617c 100644 --- a/paddleocr/_pipelines/paddleocr_vl.py +++ b/paddleocr/_pipelines/paddleocr_vl.py @@ -12,6 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + +import argparse +from typing import Any, Iterator + +from .._abstract import CLISubcommandExecutor +from .._types import InputType, PredictResult from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, @@ -37,36 +44,36 @@ class PaddleOCRVL(PaddleXPipelineWrapper): def __init__( self, - pipeline_version=_DEFAULT_PIPELINE_VERSION, - layout_detection_model_name=None, - layout_detection_model_dir=None, - layout_threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - vl_rec_model_name=None, - vl_rec_model_dir=None, - vl_rec_backend=None, - vl_rec_server_url=None, - vl_rec_max_concurrency=None, - vl_rec_api_model_name=None, - vl_rec_api_key=None, - doc_orientation_classify_model_name=None, - doc_orientation_classify_model_dir=None, - doc_unwarping_model_name=None, - doc_unwarping_model_dir=None, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_layout_detection=None, - use_chart_recognition=None, - use_seal_recognition=None, - use_ocr_for_image_block=None, - format_block_content=None, - merge_layout_blocks=None, - markdown_ignore_labels=None, - use_queues=None, - **kwargs, - ): + pipeline_version: str = _DEFAULT_PIPELINE_VERSION, + layout_detection_model_name: str | None = None, + layout_detection_model_dir: str | None = None, + layout_threshold: float | None = None, + layout_nms: bool | None = None, + layout_unclip_ratio: float | None = None, + layout_merge_bboxes_mode: str | None = None, + vl_rec_model_name: str | None = None, + vl_rec_model_dir: str | None = None, + vl_rec_backend: str | None = None, + vl_rec_server_url: str | None = None, + vl_rec_max_concurrency: int | None = None, + vl_rec_api_model_name: str | None = None, + vl_rec_api_key: str | None = None, + doc_orientation_classify_model_name: str | None = None, + doc_orientation_classify_model_dir: str | None = None, + doc_unwarping_model_name: str | None = None, + doc_unwarping_model_dir: str | None = None, + use_doc_orientation_classify: bool | None = None, + use_doc_unwarping: bool | None = None, + use_layout_detection: bool | None = None, + use_chart_recognition: bool | None = None, + use_seal_recognition: bool | None = None, + use_ocr_for_image_block: bool | None = None, + format_block_content: bool | None = None, + merge_layout_blocks: bool | None = None, + markdown_ignore_labels: list[str] | None = None, + use_queues: bool | None = None, + **kwargs: Any, + ) -> None: if pipeline_version not in _AVAILABLE_PIPELINE_VERSIONS: raise ValueError( f"Invalid pipeline version: {pipeline_version}. Supported versions are {_AVAILABLE_PIPELINE_VERSIONS}." @@ -87,7 +94,7 @@ def __init__( super().__init__(**kwargs) @property - def _paddlex_pipeline_name(self): + def _paddlex_pipeline_name(self) -> str: if self.pipeline_version == "v1": return "PaddleOCR-VL" elif self.pipeline_version == "v1.5": @@ -97,33 +104,33 @@ def _paddlex_pipeline_name(self): def predict_iter( self, - input, + input: InputType, *, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_layout_detection=None, - use_chart_recognition=None, - use_seal_recognition=None, - use_ocr_for_image_block=None, - layout_threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - layout_shape_mode="auto", - use_queues=None, - prompt_label=None, - format_block_content=None, - repetition_penalty=None, - temperature=None, - top_p=None, - min_pixels=None, - max_pixels=None, - max_new_tokens=None, - merge_layout_blocks=None, - markdown_ignore_labels=None, - vlm_extra_args=None, - **kwargs, - ): + use_doc_orientation_classify: bool | None = None, + use_doc_unwarping: bool | None = None, + use_layout_detection: bool | None = None, + use_chart_recognition: bool | None = None, + use_seal_recognition: bool | None = None, + use_ocr_for_image_block: bool | None = None, + layout_threshold: float | None = None, + layout_nms: bool | None = None, + layout_unclip_ratio: float | None = None, + layout_merge_bboxes_mode: str | None = None, + layout_shape_mode: str = "auto", + use_queues: bool | None = None, + prompt_label: str | None = None, + format_block_content: bool | None = None, + repetition_penalty: float | None = None, + temperature: float | None = None, + top_p: float | None = None, + min_pixels: int | None = None, + max_pixels: int | None = None, + max_new_tokens: int | None = None, + merge_layout_blocks: bool | None = None, + markdown_ignore_labels: list[str] | None = None, + vlm_extra_args: dict[str, Any] | None = None, + **kwargs: Any, + ) -> Iterator[PredictResult]: return self.paddlex_pipeline.predict( input, use_doc_orientation_classify=use_doc_orientation_classify, @@ -154,33 +161,33 @@ def predict_iter( def predict( self, - input, + input: InputType, *, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_layout_detection=None, - use_chart_recognition=None, - use_seal_recognition=None, - use_ocr_for_image_block=None, - layout_threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - layout_shape_mode="auto", - use_queues=None, - prompt_label=None, - format_block_content=None, - repetition_penalty=None, - temperature=None, - top_p=None, - min_pixels=None, - max_pixels=None, - max_new_tokens=None, - merge_layout_blocks=None, - markdown_ignore_labels=None, - vlm_extra_args=None, - **kwargs, - ): + use_doc_orientation_classify: bool | None = None, + use_doc_unwarping: bool | None = None, + use_layout_detection: bool | None = None, + use_chart_recognition: bool | None = None, + use_seal_recognition: bool | None = None, + use_ocr_for_image_block: bool | None = None, + layout_threshold: float | None = None, + layout_nms: bool | None = None, + layout_unclip_ratio: float | None = None, + layout_merge_bboxes_mode: str | None = None, + layout_shape_mode: str = "auto", + use_queues: bool | None = None, + prompt_label: str | None = None, + format_block_content: bool | None = None, + repetition_penalty: float | None = None, + temperature: float | None = None, + top_p: float | None = None, + min_pixels: int | None = None, + max_pixels: int | None = None, + max_new_tokens: int | None = None, + merge_layout_blocks: bool | None = None, + markdown_ignore_labels: list[str] | None = None, + vlm_extra_args: dict[str, Any] | None = None, + **kwargs: Any, + ) -> list[PredictResult]: return list( self.predict_iter( input, @@ -211,12 +218,12 @@ def predict( ) ) - def concatenate_markdown_pages(self, markdown_list): + def concatenate_markdown_pages(self, markdown_list: list[str]) -> str: return self.paddlex_pipeline.concatenate_markdown_pages(markdown_list) def restructure_pages( - self, res_list, merge_tables=True, relevel_titles=True, concatenate_pages=False - ): + self, res_list: list[Any], merge_tables: bool = True, relevel_titles: bool = True, concatenate_pages: bool = False + ) -> list[Any]: return list( self.paddlex_pipeline.restructure_pages( res_list, @@ -227,10 +234,10 @@ def restructure_pages( ) @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return PaddleOCRVLCLISubcommandExecutor() - def _get_paddlex_config_overrides(self): + def _get_paddlex_config_overrides(self) -> dict[str, Any]: STRUCTURE = { "SubPipelines.DocPreprocessor.use_doc_orientation_classify": self._params[ "use_doc_orientation_classify" @@ -297,10 +304,10 @@ def _get_paddlex_config_overrides(self): class PaddleOCRVLCLISubcommandExecutor(PipelineCLISubcommandExecutor): @property - def subparser_name(self): + def subparser_name(self) -> str: return "doc_parser" - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: add_simple_inference_args(subparser) subparser.add_argument( @@ -494,7 +501,7 @@ def _update_subparser(self, subparser): help="Maximum number of tokens generated by the VLM.", ) - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) perform_simple_inference( PaddleOCRVL, diff --git a/paddleocr/_pipelines/pp_chatocrv4_doc.py b/paddleocr/_pipelines/pp_chatocrv4_doc.py index c9f0f92afe8..d294885bc3a 100644 --- a/paddleocr/_pipelines/pp_chatocrv4_doc.py +++ b/paddleocr/_pipelines/pp_chatocrv4_doc.py @@ -12,6 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + +import argparse +from typing import Any, Iterator + +from .._abstract import CLISubcommandExecutor +from .._types import InputType, PredictResult from .._utils.cli import ( get_subcommand_args, str2bool, @@ -23,53 +30,53 @@ class PPChatOCRv4Doc(PaddleXPipelineWrapper): def __init__( self, - layout_detection_model_name=None, - layout_detection_model_dir=None, - doc_orientation_classify_model_name=None, - doc_orientation_classify_model_dir=None, - doc_unwarping_model_name=None, - doc_unwarping_model_dir=None, - text_detection_model_name=None, - text_detection_model_dir=None, - textline_orientation_model_name=None, - textline_orientation_model_dir=None, - textline_orientation_batch_size=None, - text_recognition_model_name=None, - text_recognition_model_dir=None, - text_recognition_batch_size=None, - table_structure_recognition_model_name=None, - table_structure_recognition_model_dir=None, - seal_text_detection_model_name=None, - seal_text_detection_model_dir=None, - seal_text_recognition_model_name=None, - seal_text_recognition_model_dir=None, - seal_text_recognition_batch_size=None, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_textline_orientation=None, - use_seal_recognition=None, - use_table_recognition=None, - layout_threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - text_det_limit_side_len=None, - text_det_limit_type=None, - text_det_thresh=None, - text_det_box_thresh=None, - text_det_unclip_ratio=None, - text_rec_score_thresh=None, - seal_det_limit_side_len=None, - seal_det_limit_type=None, - seal_det_thresh=None, - seal_det_box_thresh=None, - seal_det_unclip_ratio=None, - seal_rec_score_thresh=None, - retriever_config=None, - mllm_chat_bot_config=None, - chat_bot_config=None, - **kwargs, - ): + layout_detection_model_name: str | None = None, + layout_detection_model_dir: str | None = None, + doc_orientation_classify_model_name: str | None = None, + doc_orientation_classify_model_dir: str | None = None, + doc_unwarping_model_name: str | None = None, + doc_unwarping_model_dir: str | None = None, + text_detection_model_name: str | None = None, + text_detection_model_dir: str | None = None, + textline_orientation_model_name: str | None = None, + textline_orientation_model_dir: str | None = None, + textline_orientation_batch_size: int | None = None, + text_recognition_model_name: str | None = None, + text_recognition_model_dir: str | None = None, + text_recognition_batch_size: int | None = None, + table_structure_recognition_model_name: str | None = None, + table_structure_recognition_model_dir: str | None = None, + seal_text_detection_model_name: str | None = None, + seal_text_detection_model_dir: str | None = None, + seal_text_recognition_model_name: str | None = None, + seal_text_recognition_model_dir: str | None = None, + seal_text_recognition_batch_size: int | None = None, + use_doc_orientation_classify: bool | None = None, + use_doc_unwarping: bool | None = None, + use_textline_orientation: bool | None = None, + use_seal_recognition: bool | None = None, + use_table_recognition: bool | None = None, + layout_threshold: float | None = None, + layout_nms: bool | None = None, + layout_unclip_ratio: float | None = None, + layout_merge_bboxes_mode: str | None = None, + text_det_limit_side_len: int | None = None, + text_det_limit_type: str | None = None, + text_det_thresh: float | None = None, + text_det_box_thresh: float | None = None, + text_det_unclip_ratio: float | None = None, + text_rec_score_thresh: float | None = None, + seal_det_limit_side_len: int | None = None, + seal_det_limit_type: str | None = None, + seal_det_thresh: float | None = None, + seal_det_box_thresh: float | None = None, + seal_det_unclip_ratio: float | None = None, + seal_rec_score_thresh: float | None = None, + retriever_config: dict[str, Any] | None = None, + mllm_chat_bot_config: dict[str, Any] | None = None, + chat_bot_config: dict[str, Any] | None = None, + **kwargs: Any, + ) -> None: params = locals().copy() params.pop("self") params.pop("kwargs") @@ -78,56 +85,56 @@ def __init__( super().__init__(**kwargs) @property - def _paddlex_pipeline_name(self): + def _paddlex_pipeline_name(self) -> str: return "PP-ChatOCRv4-doc" - def save_vector(self, vector_info, save_path, retriever_config=None): + def save_vector(self, vector_info: Any, save_path: str, retriever_config: dict[str, Any] | None = None) -> Any: return self.paddlex_pipeline.save_vector( vector_info=vector_info, save_path=save_path, retriever_config=retriever_config, ) - def load_vector(self, data_path, retriever_config=None): + def load_vector(self, data_path: str, retriever_config: dict[str, Any] | None = None) -> Any: return self.paddlex_pipeline.load_vector( data_path=data_path, retriever_config=retriever_config ) - def load_visual_info_list(self, data_path): + def load_visual_info_list(self, data_path: str) -> Any: return self.paddlex_pipeline.load_visual_info_list(data_path=data_path) - def save_visual_info_list(self, visual_info, save_path): + def save_visual_info_list(self, visual_info: Any, save_path: str) -> Any: return self.paddlex_pipeline.save_visual_info_list( visual_info=visual_info, save_path=save_path ) def visual_predict_iter( self, - input, + input: InputType, *, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_textline_orientation=None, - use_seal_recognition=None, - use_table_recognition=None, - layout_threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - text_det_limit_side_len=None, - text_det_limit_type=None, - text_det_thresh=None, - text_det_box_thresh=None, - text_det_unclip_ratio=None, - text_rec_score_thresh=None, - seal_det_limit_side_len=None, - seal_det_limit_type=None, - seal_det_thresh=None, - seal_det_box_thresh=None, - seal_det_unclip_ratio=None, - seal_rec_score_thresh=None, - **kwargs, - ): + use_doc_orientation_classify: bool | None = None, + use_doc_unwarping: bool | None = None, + use_textline_orientation: bool | None = None, + use_seal_recognition: bool | None = None, + use_table_recognition: bool | None = None, + layout_threshold: float | None = None, + layout_nms: bool | None = None, + layout_unclip_ratio: float | None = None, + layout_merge_bboxes_mode: str | None = None, + text_det_limit_side_len: int | None = None, + text_det_limit_type: str | None = None, + text_det_thresh: float | None = None, + text_det_box_thresh: float | None = None, + text_det_unclip_ratio: float | None = None, + text_rec_score_thresh: float | None = None, + seal_det_limit_side_len: int | None = None, + seal_det_limit_type: str | None = None, + seal_det_thresh: float | None = None, + seal_det_box_thresh: float | None = None, + seal_det_unclip_ratio: float | None = None, + seal_rec_score_thresh: float | None = None, + **kwargs: Any, + ) -> Iterator[PredictResult]: return self.paddlex_pipeline.visual_predict( input, use_doc_orientation_classify=use_doc_orientation_classify, @@ -156,31 +163,31 @@ def visual_predict_iter( def visual_predict( self, - input, + input: InputType, *, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_textline_orientation=None, - use_seal_recognition=None, - use_table_recognition=None, - layout_threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - text_det_limit_side_len=None, - text_det_limit_type=None, - text_det_thresh=None, - text_det_box_thresh=None, - text_det_unclip_ratio=None, - text_rec_score_thresh=None, - seal_det_limit_side_len=None, - seal_det_limit_type=None, - seal_det_thresh=None, - seal_det_box_thresh=None, - seal_det_unclip_ratio=None, - seal_rec_score_thresh=None, - **kwargs, - ): + use_doc_orientation_classify: bool | None = None, + use_doc_unwarping: bool | None = None, + use_textline_orientation: bool | None = None, + use_seal_recognition: bool | None = None, + use_table_recognition: bool | None = None, + layout_threshold: float | None = None, + layout_nms: bool | None = None, + layout_unclip_ratio: float | None = None, + layout_merge_bboxes_mode: str | None = None, + text_det_limit_side_len: int | None = None, + text_det_limit_type: str | None = None, + text_det_thresh: float | None = None, + text_det_box_thresh: float | None = None, + text_det_unclip_ratio: float | None = None, + text_rec_score_thresh: float | None = None, + seal_det_limit_side_len: int | None = None, + seal_det_limit_type: str | None = None, + seal_det_thresh: float | None = None, + seal_det_box_thresh: float | None = None, + seal_det_unclip_ratio: float | None = None, + seal_rec_score_thresh: float | None = None, + **kwargs: Any, + ) -> list[PredictResult]: return list( self.visual_predict_iter( input, @@ -211,13 +218,13 @@ def visual_predict( def build_vector( self, - visual_info, + visual_info: Any, *, - min_characters=3500, - block_size=300, - flag_save_bytes_vector=False, - retriever_config=None, - ): + min_characters: int = 3500, + block_size: int = 300, + flag_save_bytes_vector: bool = False, + retriever_config: dict[str, Any] | None = None, + ) -> Any: return self.paddlex_pipeline.build_vector( visual_info, min_characters=min_characters, @@ -226,7 +233,7 @@ def build_vector( retriever_config=retriever_config, ) - def mllm_pred(self, input, key_list, *, mllm_chat_bot_config=None): + def mllm_pred(self, input: InputType, key_list: list[str], *, mllm_chat_bot_config: dict[str, Any] | None = None) -> Any: return self.paddlex_pipeline.mllm_pred( input, key_list, @@ -235,27 +242,27 @@ def mllm_pred(self, input, key_list, *, mllm_chat_bot_config=None): def chat( self, - key_list, - visual_info, + key_list: list[str], + visual_info: Any, *, - use_vector_retrieval=True, - vector_info=None, - min_characters=3500, - text_task_description=None, - text_output_format=None, - text_rules_str=None, - text_few_shot_demo_text_content=None, - text_few_shot_demo_key_value_list=None, - table_task_description=None, - table_output_format=None, - table_rules_str=None, - table_few_shot_demo_text_content=None, - table_few_shot_demo_key_value_list=None, - mllm_predict_info=None, - mllm_integration_strategy="integration", - chat_bot_config=None, - retriever_config=None, - ): + use_vector_retrieval: bool = True, + vector_info: Any = None, + min_characters: int = 3500, + text_task_description: str | None = None, + text_output_format: str | None = None, + text_rules_str: str | None = None, + text_few_shot_demo_text_content: str | None = None, + text_few_shot_demo_key_value_list: str | None = None, + table_task_description: str | None = None, + table_output_format: str | None = None, + table_rules_str: str | None = None, + table_few_shot_demo_text_content: str | None = None, + table_few_shot_demo_key_value_list: str | None = None, + mllm_predict_info: Any = None, + mllm_integration_strategy: str = "integration", + chat_bot_config: dict[str, Any] | None = None, + retriever_config: dict[str, Any] | None = None, + ) -> Any: return self.paddlex_pipeline.chat( key_list, visual_info, @@ -279,10 +286,10 @@ def chat( ) @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return PPChatOCRv4DocCLISubcommandExecutor() - def _get_paddlex_config_overrides(self): + def _get_paddlex_config_overrides(self) -> dict[str, Any]: STRUCTURE = { "SubPipelines.LayoutParser.SubModules.LayoutDetection.model_name": self._params[ "layout_detection_model_name" @@ -423,10 +430,10 @@ def _get_paddlex_config_overrides(self): class PPChatOCRv4DocCLISubcommandExecutor(PipelineCLISubcommandExecutor): @property - def subparser_name(self): + def subparser_name(self) -> str: return "pp_chatocrv4_doc" - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: subparser.add_argument( "-i", "--input", @@ -681,7 +688,7 @@ def _update_subparser(self, subparser): help="Configuration for the multimodal large language model.", ) - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) input = params.pop("input") keys = params.pop("keys") diff --git a/paddleocr/_pipelines/pp_doctranslation.py b/paddleocr/_pipelines/pp_doctranslation.py index 22b907c2e8b..14ae3716f4f 100644 --- a/paddleocr/_pipelines/pp_doctranslation.py +++ b/paddleocr/_pipelines/pp_doctranslation.py @@ -12,6 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + +import argparse +from typing import Any, Iterator + +from .._abstract import CLISubcommandExecutor +from .._types import InputType, PredictResult from .._utils.cli import ( get_subcommand_args, str2bool, @@ -24,72 +31,72 @@ class PPDocTranslation(PaddleXPipelineWrapper): def __init__( self, - layout_detection_model_name=None, - layout_detection_model_dir=None, - layout_threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - chart_recognition_model_name=None, - chart_recognition_model_dir=None, - chart_recognition_batch_size=None, - region_detection_model_name=None, - region_detection_model_dir=None, - doc_orientation_classify_model_name=None, - doc_orientation_classify_model_dir=None, - doc_unwarping_model_name=None, - doc_unwarping_model_dir=None, - text_detection_model_name=None, - text_detection_model_dir=None, - text_det_limit_side_len=None, - text_det_limit_type=None, - text_det_thresh=None, - text_det_box_thresh=None, - text_det_unclip_ratio=None, - textline_orientation_model_name=None, - textline_orientation_model_dir=None, - textline_orientation_batch_size=None, - text_recognition_model_name=None, - text_recognition_model_dir=None, - text_recognition_batch_size=None, - text_rec_score_thresh=None, - table_classification_model_name=None, - table_classification_model_dir=None, - wired_table_structure_recognition_model_name=None, - wired_table_structure_recognition_model_dir=None, - wireless_table_structure_recognition_model_name=None, - wireless_table_structure_recognition_model_dir=None, - wired_table_cells_detection_model_name=None, - wired_table_cells_detection_model_dir=None, - wireless_table_cells_detection_model_name=None, - wireless_table_cells_detection_model_dir=None, - table_orientation_classify_model_name=None, - table_orientation_classify_model_dir=None, - seal_text_detection_model_name=None, - seal_text_detection_model_dir=None, - seal_det_limit_side_len=None, - seal_det_limit_type=None, - seal_det_thresh=None, - seal_det_box_thresh=None, - seal_det_unclip_ratio=None, - seal_text_recognition_model_name=None, - seal_text_recognition_model_dir=None, - seal_text_recognition_batch_size=None, - seal_rec_score_thresh=None, - formula_recognition_model_name=None, - formula_recognition_model_dir=None, - formula_recognition_batch_size=None, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_textline_orientation=None, - use_seal_recognition=None, - use_table_recognition=None, - use_formula_recognition=None, - use_chart_recognition=None, - use_region_detection=None, - chat_bot_config=None, - **kwargs, - ): + layout_detection_model_name: str | None = None, + layout_detection_model_dir: str | None = None, + layout_threshold: float | None = None, + layout_nms: bool | None = None, + layout_unclip_ratio: float | None = None, + layout_merge_bboxes_mode: str | None = None, + chart_recognition_model_name: str | None = None, + chart_recognition_model_dir: str | None = None, + chart_recognition_batch_size: int | None = None, + region_detection_model_name: str | None = None, + region_detection_model_dir: str | None = None, + doc_orientation_classify_model_name: str | None = None, + doc_orientation_classify_model_dir: str | None = None, + doc_unwarping_model_name: str | None = None, + doc_unwarping_model_dir: str | None = None, + text_detection_model_name: str | None = None, + text_detection_model_dir: str | None = None, + text_det_limit_side_len: int | None = None, + text_det_limit_type: str | None = None, + text_det_thresh: float | None = None, + text_det_box_thresh: float | None = None, + text_det_unclip_ratio: float | None = None, + textline_orientation_model_name: str | None = None, + textline_orientation_model_dir: str | None = None, + textline_orientation_batch_size: int | None = None, + text_recognition_model_name: str | None = None, + text_recognition_model_dir: str | None = None, + text_recognition_batch_size: int | None = None, + text_rec_score_thresh: float | None = None, + table_classification_model_name: str | None = None, + table_classification_model_dir: str | None = None, + wired_table_structure_recognition_model_name: str | None = None, + wired_table_structure_recognition_model_dir: str | None = None, + wireless_table_structure_recognition_model_name: str | None = None, + wireless_table_structure_recognition_model_dir: str | None = None, + wired_table_cells_detection_model_name: str | None = None, + wired_table_cells_detection_model_dir: str | None = None, + wireless_table_cells_detection_model_name: str | None = None, + wireless_table_cells_detection_model_dir: str | None = None, + table_orientation_classify_model_name: str | None = None, + table_orientation_classify_model_dir: str | None = None, + seal_text_detection_model_name: str | None = None, + seal_text_detection_model_dir: str | None = None, + seal_det_limit_side_len: int | None = None, + seal_det_limit_type: str | None = None, + seal_det_thresh: float | None = None, + seal_det_box_thresh: float | None = None, + seal_det_unclip_ratio: float | None = None, + seal_text_recognition_model_name: str | None = None, + seal_text_recognition_model_dir: str | None = None, + seal_text_recognition_batch_size: int | None = None, + seal_rec_score_thresh: float | None = None, + formula_recognition_model_name: str | None = None, + formula_recognition_model_dir: str | None = None, + formula_recognition_batch_size: int | None = None, + use_doc_orientation_classify: bool | None = None, + use_doc_unwarping: bool | None = None, + use_textline_orientation: bool | None = None, + use_seal_recognition: bool | None = None, + use_table_recognition: bool | None = None, + use_formula_recognition: bool | None = None, + use_chart_recognition: bool | None = None, + use_region_detection: bool | None = None, + chat_bot_config: dict[str, Any] | None = None, + **kwargs: Any, + ) -> None: params = locals().copy() params.pop("self") params.pop("kwargs") @@ -98,45 +105,45 @@ def __init__( super().__init__(**kwargs) @property - def _paddlex_pipeline_name(self): + def _paddlex_pipeline_name(self) -> str: return "PP-DocTranslation" def visual_predict_iter( self, - input, + input: InputType, *, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_textline_orientation=None, - use_seal_recognition=None, - use_table_recognition=None, - use_formula_recognition=None, - use_chart_recognition=None, - use_region_detection=None, - layout_threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - text_det_limit_side_len=None, - text_det_limit_type=None, - text_det_thresh=None, - text_det_box_thresh=None, - text_det_unclip_ratio=None, - text_rec_score_thresh=None, - seal_det_limit_side_len=None, - seal_det_limit_type=None, - seal_det_thresh=None, - seal_det_box_thresh=None, - seal_det_unclip_ratio=None, - seal_rec_score_thresh=None, - use_wired_table_cells_trans_to_html=False, - use_wireless_table_cells_trans_to_html=False, - use_table_orientation_classify=True, - use_ocr_results_with_table_cells=True, - use_e2e_wired_table_rec_model=False, - use_e2e_wireless_table_rec_model=True, - **kwargs, - ): + use_doc_orientation_classify: bool | None = None, + use_doc_unwarping: bool | None = None, + use_textline_orientation: bool | None = None, + use_seal_recognition: bool | None = None, + use_table_recognition: bool | None = None, + use_formula_recognition: bool | None = None, + use_chart_recognition: bool | None = None, + use_region_detection: bool | None = None, + layout_threshold: float | None = None, + layout_nms: bool | None = None, + layout_unclip_ratio: float | None = None, + layout_merge_bboxes_mode: str | None = None, + text_det_limit_side_len: int | None = None, + text_det_limit_type: str | None = None, + text_det_thresh: float | None = None, + text_det_box_thresh: float | None = None, + text_det_unclip_ratio: float | None = None, + text_rec_score_thresh: float | None = None, + seal_det_limit_side_len: int | None = None, + seal_det_limit_type: str | None = None, + seal_det_thresh: float | None = None, + seal_det_box_thresh: float | None = None, + seal_det_unclip_ratio: float | None = None, + seal_rec_score_thresh: float | None = None, + use_wired_table_cells_trans_to_html: bool = False, + use_wireless_table_cells_trans_to_html: bool = False, + use_table_orientation_classify: bool = True, + use_ocr_results_with_table_cells: bool = True, + use_e2e_wired_table_rec_model: bool = False, + use_e2e_wireless_table_rec_model: bool = True, + **kwargs: Any, + ) -> Iterator[PredictResult]: return self.paddlex_pipeline.visual_predict( input, use_doc_orientation_classify=use_doc_orientation_classify, @@ -174,40 +181,40 @@ def visual_predict_iter( def visual_predict( self, - input, + input: InputType, *, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_textline_orientation=None, - use_seal_recognition=None, - use_table_recognition=None, - use_formula_recognition=None, - use_chart_recognition=None, - use_region_detection=None, - layout_threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - text_det_limit_side_len=None, - text_det_limit_type=None, - text_det_thresh=None, - text_det_box_thresh=None, - text_det_unclip_ratio=None, - text_rec_score_thresh=None, - seal_det_limit_side_len=None, - seal_det_limit_type=None, - seal_det_thresh=None, - seal_det_box_thresh=None, - seal_det_unclip_ratio=None, - seal_rec_score_thresh=None, - use_wired_table_cells_trans_to_html=False, - use_wireless_table_cells_trans_to_html=False, - use_table_orientation_classify=True, - use_ocr_results_with_table_cells=True, - use_e2e_wired_table_rec_model=False, - use_e2e_wireless_table_rec_model=True, - **kwargs, - ): + use_doc_orientation_classify: bool | None = None, + use_doc_unwarping: bool | None = None, + use_textline_orientation: bool | None = None, + use_seal_recognition: bool | None = None, + use_table_recognition: bool | None = None, + use_formula_recognition: bool | None = None, + use_chart_recognition: bool | None = None, + use_region_detection: bool | None = None, + layout_threshold: float | None = None, + layout_nms: bool | None = None, + layout_unclip_ratio: float | None = None, + layout_merge_bboxes_mode: str | None = None, + text_det_limit_side_len: int | None = None, + text_det_limit_type: str | None = None, + text_det_thresh: float | None = None, + text_det_box_thresh: float | None = None, + text_det_unclip_ratio: float | None = None, + text_rec_score_thresh: float | None = None, + seal_det_limit_side_len: int | None = None, + seal_det_limit_type: str | None = None, + seal_det_thresh: float | None = None, + seal_det_box_thresh: float | None = None, + seal_det_unclip_ratio: float | None = None, + seal_rec_score_thresh: float | None = None, + use_wired_table_cells_trans_to_html: bool = False, + use_wireless_table_cells_trans_to_html: bool = False, + use_table_orientation_classify: bool = True, + use_ocr_results_with_table_cells: bool = True, + use_e2e_wired_table_rec_model: bool = False, + use_e2e_wireless_table_rec_model: bool = True, + **kwargs: Any, + ) -> list[PredictResult]: return list( self.visual_predict_iter( input, @@ -247,20 +254,20 @@ def visual_predict( def translate_iter( self, - ori_md_info_list, + ori_md_info_list: list[Any], *, - target_language="zh", - chunk_size=5000, - task_description=None, - output_format=None, - rules_str=None, - few_shot_demo_text_content=None, - few_shot_demo_key_value_list=None, - glossary=None, - llm_request_interval=0.0, - chat_bot_config=None, - **kwargs, - ): + target_language: str = "zh", + chunk_size: int = 5000, + task_description: str | None = None, + output_format: str | None = None, + rules_str: str | None = None, + few_shot_demo_text_content: str | None = None, + few_shot_demo_key_value_list: list[Any] | None = None, + glossary: str | None = None, + llm_request_interval: float = 0.0, + chat_bot_config: dict[str, Any] | None = None, + **kwargs: Any, + ) -> Iterator[Any]: return self.paddlex_pipeline.translate( ori_md_info_list, target_language=target_language, @@ -278,20 +285,20 @@ def translate_iter( def translate( self, - ori_md_info_list, + ori_md_info_list: list[Any], *, - target_language="zh", - chunk_size=5000, - task_description=None, - output_format=None, - rules_str=None, - few_shot_demo_text_content=None, - few_shot_demo_key_value_list=None, - glossary=None, - llm_request_interval=0.0, - chat_bot_config=None, - **kwargs, - ): + target_language: str = "zh", + chunk_size: int = 5000, + task_description: str | None = None, + output_format: str | None = None, + rules_str: str | None = None, + few_shot_demo_text_content: str | None = None, + few_shot_demo_key_value_list: list[Any] | None = None, + glossary: str | None = None, + llm_request_interval: float = 0.0, + chat_bot_config: dict[str, Any] | None = None, + **kwargs: Any, + ) -> list[Any]: return list( self.translate_iter( ori_md_info_list, @@ -309,17 +316,17 @@ def translate( ) ) - def load_from_markdown(self, input): + def load_from_markdown(self, input: InputType) -> Any: return self.paddlex_pipeline.load_from_markdown(input) - def concatenate_markdown_pages(self, markdown_list): + def concatenate_markdown_pages(self, markdown_list: list[str]) -> str: return self.paddlex_pipeline.concatenate_markdown_pages(markdown_list) @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return PPDocTranslationCLISubcommandExecutor() - def _get_paddlex_config_overrides(self): + def _get_paddlex_config_overrides(self) -> dict[str, Any]: # HACK: We should consider reducing duplication. STRUCTURE = { "SubPipelines.LayoutParser.SubPipelines.DocPreprocessor.use_doc_orientation_classify": self._params[ @@ -561,10 +568,10 @@ def _get_paddlex_config_overrides(self): class PPDocTranslationCLISubcommandExecutor(PipelineCLISubcommandExecutor): @property - def subparser_name(self): + def subparser_name(self) -> str: return "pp_doctranslation" - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: subparser.add_argument( "-i", "--input", @@ -907,7 +914,7 @@ def _update_subparser(self, subparser): help="Configuration for the embedding model.", ) - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) input = params.pop("input") target_language = params.pop("target_language") @@ -939,7 +946,7 @@ def execute_with_args(self, args): target_language=target_language, ) - for res in result_translate: - res.print() + for trans_res in result_translate: + trans_res.print() if save_path: - res.save_to_markdown(save_path) + trans_res.save_to_markdown(save_path) diff --git a/paddleocr/_pipelines/pp_structurev3.py b/paddleocr/_pipelines/pp_structurev3.py index eefe7d1c851..e1b57092ca4 100644 --- a/paddleocr/_pipelines/pp_structurev3.py +++ b/paddleocr/_pipelines/pp_structurev3.py @@ -12,7 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + +import argparse import warnings +from typing import Any, Iterator, Sequence + +from .._abstract import CLISubcommandExecutor +from .._types import InputType, PredictResult from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, @@ -31,75 +38,75 @@ class PPStructureV3(PaddleXPipelineWrapper): def __init__( self, - layout_detection_model_name=None, - layout_detection_model_dir=None, - layout_threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - chart_recognition_model_name=None, - chart_recognition_model_dir=None, - chart_recognition_batch_size=None, - region_detection_model_name=None, - region_detection_model_dir=None, - doc_orientation_classify_model_name=None, - doc_orientation_classify_model_dir=None, - doc_unwarping_model_name=None, - doc_unwarping_model_dir=None, - text_detection_model_name=None, - text_detection_model_dir=None, - text_det_limit_side_len=None, - text_det_limit_type=None, - text_det_thresh=None, - text_det_box_thresh=None, - text_det_unclip_ratio=None, - textline_orientation_model_name=None, - textline_orientation_model_dir=None, - textline_orientation_batch_size=None, - text_recognition_model_name=None, - text_recognition_model_dir=None, - text_recognition_batch_size=None, - text_rec_score_thresh=None, - table_classification_model_name=None, - table_classification_model_dir=None, - wired_table_structure_recognition_model_name=None, - wired_table_structure_recognition_model_dir=None, - wireless_table_structure_recognition_model_name=None, - wireless_table_structure_recognition_model_dir=None, - wired_table_cells_detection_model_name=None, - wired_table_cells_detection_model_dir=None, - wireless_table_cells_detection_model_name=None, - wireless_table_cells_detection_model_dir=None, - table_orientation_classify_model_name=None, - table_orientation_classify_model_dir=None, - seal_text_detection_model_name=None, - seal_text_detection_model_dir=None, - seal_det_limit_side_len=None, - seal_det_limit_type=None, - seal_det_thresh=None, - seal_det_box_thresh=None, - seal_det_unclip_ratio=None, - seal_text_recognition_model_name=None, - seal_text_recognition_model_dir=None, - seal_text_recognition_batch_size=None, - seal_rec_score_thresh=None, - formula_recognition_model_name=None, - formula_recognition_model_dir=None, - formula_recognition_batch_size=None, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_textline_orientation=None, - use_seal_recognition=None, - use_table_recognition=None, - use_formula_recognition=None, - use_chart_recognition=None, - use_region_detection=None, - format_block_content=None, - markdown_ignore_labels=None, - lang=None, - ocr_version=None, - **kwargs, - ): + layout_detection_model_name: str | None = None, + layout_detection_model_dir: str | None = None, + layout_threshold: float | None = None, + layout_nms: bool | None = None, + layout_unclip_ratio: float | None = None, + layout_merge_bboxes_mode: str | None = None, + chart_recognition_model_name: str | None = None, + chart_recognition_model_dir: str | None = None, + chart_recognition_batch_size: int | None = None, + region_detection_model_name: str | None = None, + region_detection_model_dir: str | None = None, + doc_orientation_classify_model_name: str | None = None, + doc_orientation_classify_model_dir: str | None = None, + doc_unwarping_model_name: str | None = None, + doc_unwarping_model_dir: str | None = None, + text_detection_model_name: str | None = None, + text_detection_model_dir: str | None = None, + text_det_limit_side_len: int | None = None, + text_det_limit_type: str | None = None, + text_det_thresh: float | None = None, + text_det_box_thresh: float | None = None, + text_det_unclip_ratio: float | None = None, + textline_orientation_model_name: str | None = None, + textline_orientation_model_dir: str | None = None, + textline_orientation_batch_size: int | None = None, + text_recognition_model_name: str | None = None, + text_recognition_model_dir: str | None = None, + text_recognition_batch_size: int | None = None, + text_rec_score_thresh: float | None = None, + table_classification_model_name: str | None = None, + table_classification_model_dir: str | None = None, + wired_table_structure_recognition_model_name: str | None = None, + wired_table_structure_recognition_model_dir: str | None = None, + wireless_table_structure_recognition_model_name: str | None = None, + wireless_table_structure_recognition_model_dir: str | None = None, + wired_table_cells_detection_model_name: str | None = None, + wired_table_cells_detection_model_dir: str | None = None, + wireless_table_cells_detection_model_name: str | None = None, + wireless_table_cells_detection_model_dir: str | None = None, + table_orientation_classify_model_name: str | None = None, + table_orientation_classify_model_dir: str | None = None, + seal_text_detection_model_name: str | None = None, + seal_text_detection_model_dir: str | None = None, + seal_det_limit_side_len: int | None = None, + seal_det_limit_type: str | None = None, + seal_det_thresh: float | None = None, + seal_det_box_thresh: float | None = None, + seal_det_unclip_ratio: float | None = None, + seal_text_recognition_model_name: str | None = None, + seal_text_recognition_model_dir: str | None = None, + seal_text_recognition_batch_size: int | None = None, + seal_rec_score_thresh: float | None = None, + formula_recognition_model_name: str | None = None, + formula_recognition_model_dir: str | None = None, + formula_recognition_batch_size: int | None = None, + use_doc_orientation_classify: bool | None = None, + use_doc_unwarping: bool | None = None, + use_textline_orientation: bool | None = None, + use_seal_recognition: bool | None = None, + use_table_recognition: bool | None = None, + use_formula_recognition: bool | None = None, + use_chart_recognition: bool | None = None, + use_region_detection: bool | None = None, + format_block_content: bool | None = None, + markdown_ignore_labels: list[str] | None = None, + lang: str | None = None, + ocr_version: str | None = None, + **kwargs: Any, + ) -> None: if ocr_version is not None and ocr_version not in _SUPPORTED_OCR_VERSIONS: raise ValueError( f"Invalid OCR version: {ocr_version}. Supported values are {_SUPPORTED_OCR_VERSIONS}." @@ -142,47 +149,47 @@ def __init__( super().__init__(**kwargs) @property - def _paddlex_pipeline_name(self): + def _paddlex_pipeline_name(self) -> str: return "PP-StructureV3" def predict_iter( self, - input, + input: InputType, *, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_textline_orientation=None, - use_seal_recognition=None, - use_table_recognition=None, - use_formula_recognition=None, - use_chart_recognition=None, - use_region_detection=None, - format_block_content=None, - layout_threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - text_det_limit_side_len=None, - text_det_limit_type=None, - text_det_thresh=None, - text_det_box_thresh=None, - text_det_unclip_ratio=None, - text_rec_score_thresh=None, - seal_det_limit_side_len=None, - seal_det_limit_type=None, - seal_det_thresh=None, - seal_det_box_thresh=None, - seal_det_unclip_ratio=None, - seal_rec_score_thresh=None, - use_wired_table_cells_trans_to_html=False, - use_wireless_table_cells_trans_to_html=False, - use_table_orientation_classify=True, - use_ocr_results_with_table_cells=True, - use_e2e_wired_table_rec_model=False, - use_e2e_wireless_table_rec_model=True, - markdown_ignore_labels=None, - **kwargs, - ): + use_doc_orientation_classify: bool | None = None, + use_doc_unwarping: bool | None = None, + use_textline_orientation: bool | None = None, + use_seal_recognition: bool | None = None, + use_table_recognition: bool | None = None, + use_formula_recognition: bool | None = None, + use_chart_recognition: bool | None = None, + use_region_detection: bool | None = None, + format_block_content: bool | None = None, + layout_threshold: float | None = None, + layout_nms: bool | None = None, + layout_unclip_ratio: float | None = None, + layout_merge_bboxes_mode: str | None = None, + text_det_limit_side_len: int | None = None, + text_det_limit_type: str | None = None, + text_det_thresh: float | None = None, + text_det_box_thresh: float | None = None, + text_det_unclip_ratio: float | None = None, + text_rec_score_thresh: float | None = None, + seal_det_limit_side_len: int | None = None, + seal_det_limit_type: str | None = None, + seal_det_thresh: float | None = None, + seal_det_box_thresh: float | None = None, + seal_det_unclip_ratio: float | None = None, + seal_rec_score_thresh: float | None = None, + use_wired_table_cells_trans_to_html: bool = False, + use_wireless_table_cells_trans_to_html: bool = False, + use_table_orientation_classify: bool = True, + use_ocr_results_with_table_cells: bool = True, + use_e2e_wired_table_rec_model: bool = False, + use_e2e_wireless_table_rec_model: bool = True, + markdown_ignore_labels: list[str] | None = None, + **kwargs: Any, + ) -> Iterator[PredictResult]: return self.paddlex_pipeline.predict( input, use_doc_orientation_classify=use_doc_orientation_classify, @@ -222,42 +229,42 @@ def predict_iter( def predict( self, - input, + input: InputType, *, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_textline_orientation=None, - use_seal_recognition=None, - use_table_recognition=None, - use_formula_recognition=None, - use_chart_recognition=None, - use_region_detection=None, - format_block_content=None, - layout_threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - text_det_limit_side_len=None, - text_det_limit_type=None, - text_det_thresh=None, - text_det_box_thresh=None, - text_det_unclip_ratio=None, - text_rec_score_thresh=None, - seal_det_limit_side_len=None, - seal_det_limit_type=None, - seal_det_thresh=None, - seal_det_box_thresh=None, - seal_det_unclip_ratio=None, - seal_rec_score_thresh=None, - use_wired_table_cells_trans_to_html=False, - use_wireless_table_cells_trans_to_html=False, - use_table_orientation_classify=True, - use_ocr_results_with_table_cells=True, - use_e2e_wired_table_rec_model=False, - use_e2e_wireless_table_rec_model=True, - markdown_ignore_labels=None, - **kwargs, - ): + use_doc_orientation_classify: bool | None = None, + use_doc_unwarping: bool | None = None, + use_textline_orientation: bool | None = None, + use_seal_recognition: bool | None = None, + use_table_recognition: bool | None = None, + use_formula_recognition: bool | None = None, + use_chart_recognition: bool | None = None, + use_region_detection: bool | None = None, + format_block_content: bool | None = None, + layout_threshold: float | None = None, + layout_nms: bool | None = None, + layout_unclip_ratio: float | None = None, + layout_merge_bboxes_mode: str | None = None, + text_det_limit_side_len: int | None = None, + text_det_limit_type: str | None = None, + text_det_thresh: float | None = None, + text_det_box_thresh: float | None = None, + text_det_unclip_ratio: float | None = None, + text_rec_score_thresh: float | None = None, + seal_det_limit_side_len: int | None = None, + seal_det_limit_type: str | None = None, + seal_det_thresh: float | None = None, + seal_det_box_thresh: float | None = None, + seal_det_unclip_ratio: float | None = None, + seal_rec_score_thresh: float | None = None, + use_wired_table_cells_trans_to_html: bool = False, + use_wireless_table_cells_trans_to_html: bool = False, + use_table_orientation_classify: bool = True, + use_ocr_results_with_table_cells: bool = True, + use_e2e_wired_table_rec_model: bool = False, + use_e2e_wireless_table_rec_model: bool = True, + markdown_ignore_labels: list[str] | None = None, + **kwargs: Any, + ) -> list[PredictResult]: return list( self.predict_iter( input, @@ -297,14 +304,14 @@ def predict( ) ) - def concatenate_markdown_pages(self, markdown_list): + def concatenate_markdown_pages(self, markdown_list: list[str]) -> str: return self.paddlex_pipeline.concatenate_markdown_pages(markdown_list) @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return PPStructureV3CLISubcommandExecutor() - def _get_paddlex_config_overrides(self): + def _get_paddlex_config_overrides(self) -> dict[str, Any]: STRUCTURE = { "SubPipelines.DocPreprocessor.use_doc_orientation_classify": self._params[ "use_doc_orientation_classify" @@ -527,7 +534,7 @@ def _get_paddlex_config_overrides(self): } return create_config_from_structure(STRUCTURE) - def _get_ocr_model_names(self, lang, ppocr_version): + def _get_ocr_model_names(self, lang: str | None, ppocr_version: str | None) -> tuple[str | None, str | None]: LATIN_LANGS = [ "af", "az", @@ -692,10 +699,10 @@ def _get_ocr_model_names(self, lang, ppocr_version): class PPStructureV3CLISubcommandExecutor(PipelineCLISubcommandExecutor): @property - def subparser_name(self): + def subparser_name(self) -> str: return "pp_structurev3" - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: add_simple_inference_args(subparser) subparser.add_argument( @@ -1024,7 +1031,7 @@ def _update_subparser(self, subparser): help="List of layout labels to ignore in Markdown output.", ) - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) perform_simple_inference( PPStructureV3, diff --git a/paddleocr/_pipelines/seal_recognition.py b/paddleocr/_pipelines/seal_recognition.py index b0185316eb9..75ddc2edd2c 100644 --- a/paddleocr/_pipelines/seal_recognition.py +++ b/paddleocr/_pipelines/seal_recognition.py @@ -12,6 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + +import argparse +from typing import Any, Iterator + +from .._abstract import CLISubcommandExecutor +from .._types import InputType, PredictResult from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, @@ -25,32 +32,32 @@ class SealRecognition(PaddleXPipelineWrapper): def __init__( self, - doc_orientation_classify_model_name=None, - doc_orientation_classify_model_dir=None, - doc_unwarping_model_name=None, - doc_unwarping_model_dir=None, - layout_detection_model_name=None, - layout_detection_model_dir=None, - seal_text_detection_model_name=None, - seal_text_detection_model_dir=None, - text_recognition_model_name=None, - text_recognition_model_dir=None, - text_recognition_batch_size=None, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_layout_detection=None, - layout_threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - seal_det_limit_side_len=None, - seal_det_limit_type=None, - seal_det_thresh=None, - seal_det_box_thresh=None, - seal_det_unclip_ratio=None, - seal_rec_score_thresh=None, - **kwargs, - ): + doc_orientation_classify_model_name: str | None = None, + doc_orientation_classify_model_dir: str | None = None, + doc_unwarping_model_name: str | None = None, + doc_unwarping_model_dir: str | None = None, + layout_detection_model_name: str | None = None, + layout_detection_model_dir: str | None = None, + seal_text_detection_model_name: str | None = None, + seal_text_detection_model_dir: str | None = None, + text_recognition_model_name: str | None = None, + text_recognition_model_dir: str | None = None, + text_recognition_batch_size: int | None = None, + use_doc_orientation_classify: bool | None = None, + use_doc_unwarping: bool | None = None, + use_layout_detection: bool | None = None, + layout_threshold: float | None = None, + layout_nms: bool | None = None, + layout_unclip_ratio: float | None = None, + layout_merge_bboxes_mode: str | None = None, + seal_det_limit_side_len: int | None = None, + seal_det_limit_type: str | None = None, + seal_det_thresh: float | None = None, + seal_det_box_thresh: float | None = None, + seal_det_unclip_ratio: float | None = None, + seal_rec_score_thresh: float | None = None, + **kwargs: Any, + ) -> None: self._params = { "doc_orientation_classify_model_name": doc_orientation_classify_model_name, @@ -81,29 +88,29 @@ def __init__( super().__init__(**kwargs) @property - def _paddlex_pipeline_name(self): + def _paddlex_pipeline_name(self) -> str: return "seal_recognition" def predict_iter( self, - input, + input: InputType, *, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_layout_detection=None, - layout_det_res=None, - layout_threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - seal_det_limit_side_len=None, - seal_det_limit_type=None, - seal_det_thresh=None, - seal_det_box_thresh=None, - seal_det_unclip_ratio=None, - seal_rec_score_thresh=None, - **kwargs, - ): + use_doc_orientation_classify: bool | None = None, + use_doc_unwarping: bool | None = None, + use_layout_detection: bool | None = None, + layout_det_res: Any = None, + layout_threshold: float | None = None, + layout_nms: bool | None = None, + layout_unclip_ratio: float | None = None, + layout_merge_bboxes_mode: str | None = None, + seal_det_limit_side_len: int | None = None, + seal_det_limit_type: str | None = None, + seal_det_thresh: float | None = None, + seal_det_box_thresh: float | None = None, + seal_det_unclip_ratio: float | None = None, + seal_rec_score_thresh: float | None = None, + **kwargs: Any, + ) -> Iterator[PredictResult]: return self.paddlex_pipeline.predict( input, use_doc_orientation_classify=use_doc_orientation_classify, @@ -125,24 +132,24 @@ def predict_iter( def predict( self, - input, + input: InputType, *, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_layout_detection=None, - layout_det_res=None, - layout_threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - seal_det_limit_side_len=None, - seal_det_limit_type=None, - seal_det_thresh=None, - seal_det_box_thresh=None, - seal_det_unclip_ratio=None, - seal_rec_score_thresh=None, - **kwargs, - ): + use_doc_orientation_classify: bool | None = None, + use_doc_unwarping: bool | None = None, + use_layout_detection: bool | None = None, + layout_det_res: Any = None, + layout_threshold: float | None = None, + layout_nms: bool | None = None, + layout_unclip_ratio: float | None = None, + layout_merge_bboxes_mode: str | None = None, + seal_det_limit_side_len: int | None = None, + seal_det_limit_type: str | None = None, + seal_det_thresh: float | None = None, + seal_det_box_thresh: float | None = None, + seal_det_unclip_ratio: float | None = None, + seal_rec_score_thresh: float | None = None, + **kwargs: Any, + ) -> list[PredictResult]: return list( self.predict_iter( input, @@ -165,10 +172,10 @@ def predict( ) @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return SealRecognitionCLISubcommandExecutor() - def _get_paddlex_config_overrides(self): + def _get_paddlex_config_overrides(self) -> dict[str, Any]: STRUCTURE = { "SubPipelines.DocPreprocessor.SubModules.DocOrientationClassify.model_name": self._params[ "doc_orientation_classify_model_name" @@ -244,10 +251,10 @@ def _get_paddlex_config_overrides(self): class SealRecognitionCLISubcommandExecutor(PipelineCLISubcommandExecutor): @property - def subparser_name(self): + def subparser_name(self) -> str: return "seal_recognition" - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: add_simple_inference_args(subparser) subparser.add_argument( @@ -371,7 +378,7 @@ def _update_subparser(self, subparser): help="Text recognition threshold. Text results with scores greater than this threshold are retained.", ) - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) perform_simple_inference(SealRecognition, params) diff --git a/paddleocr/_pipelines/table_recognition_v2.py b/paddleocr/_pipelines/table_recognition_v2.py index 5cf48682256..d3f10bc1a50 100644 --- a/paddleocr/_pipelines/table_recognition_v2.py +++ b/paddleocr/_pipelines/table_recognition_v2.py @@ -12,6 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + +import argparse +from typing import Any, Iterator + +from .._abstract import CLISubcommandExecutor +from .._types import InputType, PredictResult from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, @@ -25,39 +32,39 @@ class TableRecognitionPipelineV2(PaddleXPipelineWrapper): def __init__( self, - layout_detection_model_name=None, - layout_detection_model_dir=None, - table_classification_model_name=None, - table_classification_model_dir=None, - wired_table_structure_recognition_model_name=None, - wired_table_structure_recognition_model_dir=None, - wireless_table_structure_recognition_model_name=None, - wireless_table_structure_recognition_model_dir=None, - wired_table_cells_detection_model_name=None, - wired_table_cells_detection_model_dir=None, - wireless_table_cells_detection_model_name=None, - wireless_table_cells_detection_model_dir=None, - doc_orientation_classify_model_name=None, - doc_orientation_classify_model_dir=None, - doc_unwarping_model_name=None, - doc_unwarping_model_dir=None, - text_detection_model_name=None, - text_detection_model_dir=None, - text_det_limit_side_len=None, - text_det_limit_type=None, - text_det_thresh=None, - text_det_box_thresh=None, - text_det_unclip_ratio=None, - text_recognition_model_name=None, - text_recognition_model_dir=None, - text_recognition_batch_size=None, - text_rec_score_thresh=None, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_layout_detection=None, - use_ocr_model=None, - **kwargs, - ): + layout_detection_model_name: str | None = None, + layout_detection_model_dir: str | None = None, + table_classification_model_name: str | None = None, + table_classification_model_dir: str | None = None, + wired_table_structure_recognition_model_name: str | None = None, + wired_table_structure_recognition_model_dir: str | None = None, + wireless_table_structure_recognition_model_name: str | None = None, + wireless_table_structure_recognition_model_dir: str | None = None, + wired_table_cells_detection_model_name: str | None = None, + wired_table_cells_detection_model_dir: str | None = None, + wireless_table_cells_detection_model_name: str | None = None, + wireless_table_cells_detection_model_dir: str | None = None, + doc_orientation_classify_model_name: str | None = None, + doc_orientation_classify_model_dir: str | None = None, + doc_unwarping_model_name: str | None = None, + doc_unwarping_model_dir: str | None = None, + text_detection_model_name: str | None = None, + text_detection_model_dir: str | None = None, + text_det_limit_side_len: int | None = None, + text_det_limit_type: str | None = None, + text_det_thresh: float | None = None, + text_det_box_thresh: float | None = None, + text_det_unclip_ratio: float | None = None, + text_recognition_model_name: str | None = None, + text_recognition_model_dir: str | None = None, + text_recognition_batch_size: int | None = None, + text_rec_score_thresh: float | None = None, + use_doc_orientation_classify: bool | None = None, + use_doc_unwarping: bool | None = None, + use_layout_detection: bool | None = None, + use_ocr_model: bool | None = None, + **kwargs: Any, + ) -> None: params = locals().copy() params.pop("self") params.pop("kwargs") @@ -66,33 +73,33 @@ def __init__( super().__init__(**kwargs) @property - def _paddlex_pipeline_name(self): + def _paddlex_pipeline_name(self) -> str: return "table_recognition_v2" def predict_iter( self, - input, + input: InputType, *, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_layout_detection=None, - use_ocr_model=None, - overall_ocr_res=None, - layout_det_res=None, - text_det_limit_side_len=None, - text_det_limit_type=None, - text_det_thresh=None, - text_det_box_thresh=None, - text_det_unclip_ratio=None, - text_rec_score_thresh=None, - use_e2e_wired_table_rec_model=False, - use_e2e_wireless_table_rec_model=False, - use_wired_table_cells_trans_to_html=False, - use_wireless_table_cells_trans_to_html=False, - use_table_orientation_classify=True, - use_ocr_results_with_table_cells=True, - **kwargs, - ): + use_doc_orientation_classify: bool | None = None, + use_doc_unwarping: bool | None = None, + use_layout_detection: bool | None = None, + use_ocr_model: bool | None = None, + overall_ocr_res: Any = None, + layout_det_res: Any = None, + text_det_limit_side_len: int | None = None, + text_det_limit_type: str | None = None, + text_det_thresh: float | None = None, + text_det_box_thresh: float | None = None, + text_det_unclip_ratio: float | None = None, + text_rec_score_thresh: float | None = None, + use_e2e_wired_table_rec_model: bool = False, + use_e2e_wireless_table_rec_model: bool = False, + use_wired_table_cells_trans_to_html: bool = False, + use_wireless_table_cells_trans_to_html: bool = False, + use_table_orientation_classify: bool = True, + use_ocr_results_with_table_cells: bool = True, + **kwargs: Any, + ) -> Iterator[PredictResult]: return self.paddlex_pipeline.predict( input, use_doc_orientation_classify=use_doc_orientation_classify, @@ -118,28 +125,28 @@ def predict_iter( def predict( self, - input, + input: InputType, *, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_layout_detection=None, - use_ocr_model=None, - overall_ocr_res=None, - layout_det_res=None, - text_det_limit_side_len=None, - text_det_limit_type=None, - text_det_thresh=None, - text_det_box_thresh=None, - text_det_unclip_ratio=None, - text_rec_score_thresh=None, - use_e2e_wired_table_rec_model=False, - use_e2e_wireless_table_rec_model=False, - use_wired_table_cells_trans_to_html=False, - use_wireless_table_cells_trans_to_html=False, - use_table_orientation_classify=True, - use_ocr_results_with_table_cells=True, - **kwargs, - ): + use_doc_orientation_classify: bool | None = None, + use_doc_unwarping: bool | None = None, + use_layout_detection: bool | None = None, + use_ocr_model: bool | None = None, + overall_ocr_res: Any = None, + layout_det_res: Any = None, + text_det_limit_side_len: int | None = None, + text_det_limit_type: str | None = None, + text_det_thresh: float | None = None, + text_det_box_thresh: float | None = None, + text_det_unclip_ratio: float | None = None, + text_rec_score_thresh: float | None = None, + use_e2e_wired_table_rec_model: bool = False, + use_e2e_wireless_table_rec_model: bool = False, + use_wired_table_cells_trans_to_html: bool = False, + use_wireless_table_cells_trans_to_html: bool = False, + use_table_orientation_classify: bool = True, + use_ocr_results_with_table_cells: bool = True, + **kwargs: Any, + ) -> list[PredictResult]: return list( self.predict_iter( input, @@ -166,10 +173,10 @@ def predict( ) @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return TableRecognitionPipelineV2CLISubcommandExecutor() - def _get_paddlex_config_overrides(self): + def _get_paddlex_config_overrides(self) -> dict[str, Any]: STRUCTURE = { "SubPipelines.DocPreprocessor.use_doc_orientation_classify": self._params[ "use_doc_orientation_classify" @@ -268,10 +275,10 @@ def _get_paddlex_config_overrides(self): class TableRecognitionPipelineV2CLISubcommandExecutor(PipelineCLISubcommandExecutor): @property - def subparser_name(self): + def subparser_name(self) -> str: return "table_recognition_v2" - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: add_simple_inference_args(subparser) subparser.add_argument( @@ -433,6 +440,6 @@ def _update_subparser(self, subparser): help="Whether to use OCR models.", ) - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) perform_simple_inference(TableRecognitionPipelineV2, params) diff --git a/paddleocr/_pipelines/utils.py b/paddleocr/_pipelines/utils.py index 9a7b4bae993..1385bc67b3d 100644 --- a/paddleocr/_pipelines/utils.py +++ b/paddleocr/_pipelines/utils.py @@ -12,8 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations -def create_config_from_structure(structure, *, unset=None, config=None): +from typing import Any + + +def create_config_from_structure(structure: dict[str, Any], *, unset: Any = None, config: dict[str, Any] | None = None) -> dict[str, Any]: if config is None: config = {} for k, v in structure.items(): diff --git a/paddleocr/_types.py b/paddleocr/_types.py new file mode 100644 index 00000000000..1ddf3aebe52 --- /dev/null +++ b/paddleocr/_types.py @@ -0,0 +1,30 @@ +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from os import PathLike +from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Sequence, Union + +import numpy as np + +if TYPE_CHECKING: + from PIL import Image as PILImage + +# Input types +ImageInput = Union[str, "PathLike[str]", np.ndarray, "PILImage.Image"] +InputType = Union[ImageInput, Sequence[ImageInput]] + +# Prediction result (Phase 3 will refine with TypedDict) +PredictResult = Dict[str, Any] diff --git a/paddleocr/_utils/cli.py b/paddleocr/_utils/cli.py index 6d218756c92..5ad29a4bdb2 100644 --- a/paddleocr/_utils/cli.py +++ b/paddleocr/_utils/cli.py @@ -12,23 +12,27 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + +import argparse import time +from typing import Any, Set from .logging import logger -def str2bool(v, /): +def str2bool(v: str, /) -> bool: return v.lower() in ("true", "yes", "t", "y", "1") -def get_subcommand_args(args): - args = vars(args).copy() - args.pop("subcommand") - args.pop("executor") - return args +def get_subcommand_args(args: argparse.Namespace) -> dict[str, Any]: + args_dict = vars(args).copy() + args_dict.pop("subcommand") + args_dict.pop("executor") + return args_dict -def add_simple_inference_args(subparser, *, input_help=None): +def add_simple_inference_args(subparser: argparse.ArgumentParser, *, input_help: str | None = None) -> None: if input_help is None: input_help = "Input path or URL." subparser.add_argument( @@ -45,14 +49,14 @@ def add_simple_inference_args(subparser, *, input_help=None): ) -def perform_simple_inference(wrapper_cls, params, predict_param_names=None): +def perform_simple_inference(wrapper_cls: type, params: dict[str, Any], predict_param_names: set[str] | None = None) -> None: params = params.copy() input_ = params.pop("input") save_path = params.pop("save_path") if predict_param_names is not None: - predict_params = {} + predict_params: dict[str, Any] = {} for name in predict_param_names: predict_params[name] = params.pop(name) else: diff --git a/paddleocr/_utils/deprecation.py b/paddleocr/_utils/deprecation.py index b30419db49c..a63e1ae07ac 100644 --- a/paddleocr/_utils/deprecation.py +++ b/paddleocr/_utils/deprecation.py @@ -12,9 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import argparse import sys import warnings +from typing import Any, Sequence from typing_extensions import deprecated as deprecated @@ -24,7 +27,13 @@ class CLIDeprecationWarning(DeprecationWarning): class DeprecatedOptionAction(argparse.Action): - def __call__(self, parser, namespace, values, option_string=None): + def __call__( + self, + parser: argparse.ArgumentParser, + namespace: argparse.Namespace, + values: str | Sequence[Any] | None, + option_string: str | None = None, + ) -> None: assert option_string warnings.warn( f"The option `{option_string}` has been deprecated and will be removed in the future. Please refer to the documentation for more details.", @@ -33,7 +42,7 @@ def __call__(self, parser, namespace, values, option_string=None): setattr(namespace, self.dest, values) -def warn_deprecated_param(name, new_name=None): +def warn_deprecated_param(name: str, new_name: str | None = None) -> None: msg = ( f"The parameter `{name}` has been deprecated and will be removed in the future." ) diff --git a/paddleocr/_utils/logging.py b/paddleocr/_utils/logging.py index 0ef89395db2..097bb91ba93 100644 --- a/paddleocr/_utils/logging.py +++ b/paddleocr/_utils/logging.py @@ -12,16 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import logging from .._env import DISABLE_AUTO_LOGGING_CONFIG -LOGGER_NAME = "paddleocr" +LOGGER_NAME: str = "paddleocr" -logger = logging.getLogger(LOGGER_NAME) +logger: logging.Logger = logging.getLogger(LOGGER_NAME) -def _set_up_logger(): +def _set_up_logger() -> None: if DISABLE_AUTO_LOGGING_CONFIG: return diff --git a/paddleocr/py.typed b/paddleocr/py.typed new file mode 100644 index 00000000000..e69de29bb2d diff --git a/pyproject.toml b/pyproject.toml index 2aa826d67f9..d9ca17553ce 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,3 +73,14 @@ markers = [ "resource_intensive: mark a test as resource intensive" ] addopts = "-m 'not resource_intensive'" + +[tool.mypy] +python_version = "3.8" +packages = ["paddleocr"] +warn_return_any = false +warn_unused_configs = true +disallow_untyped_defs = false + +[[tool.mypy.overrides]] +module = ["paddlex.*", "paddle.*", "yaml.*", "requests.*"] +ignore_missing_imports = true From 53ba8b455065a40c6f09c6b4f00cabe5e7a2977a Mon Sep 17 00:00:00 2001 From: Bvicii Date: Fri, 13 Mar 2026 01:28:00 -0400 Subject: [PATCH 2/4] style: apply black formatting to type-annotated files --- paddleocr/_abstract.py | 4 +++- paddleocr/_common_args.py | 15 ++++++++++++--- paddleocr/_models/base.py | 4 +++- paddleocr/_pipelines/base.py | 4 +++- paddleocr/_pipelines/paddleocr_vl.py | 6 +++++- paddleocr/_pipelines/pp_chatocrv4_doc.py | 19 ++++++++++++++++--- paddleocr/_pipelines/pp_structurev3.py | 4 +++- paddleocr/_pipelines/utils.py | 7 ++++++- paddleocr/_utils/cli.py | 10 ++++++++-- 9 files changed, 59 insertions(+), 14 deletions(-) diff --git a/paddleocr/_abstract.py b/paddleocr/_abstract.py index beb2b7354a1..8721f788ca7 100644 --- a/paddleocr/_abstract.py +++ b/paddleocr/_abstract.py @@ -21,7 +21,9 @@ class CLISubcommandExecutor(metaclass=abc.ABCMeta): @abc.abstractmethod - def add_subparser(self, subparsers: argparse._SubParsersAction) -> argparse.ArgumentParser: + def add_subparser( + self, subparsers: argparse._SubParsersAction + ) -> argparse.ArgumentParser: raise NotImplementedError @abc.abstractmethod diff --git a/paddleocr/_common_args.py b/paddleocr/_common_args.py index 7db35eec4c3..0e7445393b6 100644 --- a/paddleocr/_common_args.py +++ b/paddleocr/_common_args.py @@ -33,7 +33,9 @@ from ._utils.cli import str2bool -def parse_common_args(kwargs: dict[str, Any], *, default_enable_hpi: bool | None) -> dict[str, Any]: +def parse_common_args( + kwargs: dict[str, Any], *, default_enable_hpi: bool | None +) -> dict[str, Any]: default_vals = { "device": DEFAULT_DEVICE, "enable_hpi": default_enable_hpi, @@ -62,7 +64,9 @@ def parse_common_args(kwargs: dict[str, Any], *, default_enable_hpi: bool | None return kwargs -def prepare_common_init_args(model_name: str | None, common_args: dict[str, Any]) -> dict[str, Any]: +def prepare_common_init_args( + model_name: str | None, common_args: dict[str, Any] +) -> dict[str, Any]: device = common_args["device"] if device is None: device = get_default_device() @@ -99,7 +103,12 @@ def prepare_common_init_args(model_name: str | None, common_args: dict[str, Any] return init_kwargs -def add_common_cli_opts(parser: argparse.ArgumentParser, *, default_enable_hpi: bool | None, allow_multiple_devices: bool) -> None: +def add_common_cli_opts( + parser: argparse.ArgumentParser, + *, + default_enable_hpi: bool | None, + allow_multiple_devices: bool, +) -> None: if allow_multiple_devices: help_ = "Device(s) to use for inference, e.g., `cpu`, `gpu`, `npu`, `gpu:0`, `gpu:0,1`. If multiple devices are specified, inference will be performed in parallel. Note that parallel inference is not always supported. By default, GPU 0 will be used if available; otherwise, the CPU will be used." else: diff --git a/paddleocr/_models/base.py b/paddleocr/_models/base.py index bb2607b7d8d..251dd0acbf7 100644 --- a/paddleocr/_models/base.py +++ b/paddleocr/_models/base.py @@ -93,7 +93,9 @@ class PredictorCLISubcommandExecutor(CLISubcommandExecutor): def subparser_name(self) -> str: raise NotImplementedError - def add_subparser(self, subparsers: argparse._SubParsersAction) -> argparse.ArgumentParser: + def add_subparser( + self, subparsers: argparse._SubParsersAction + ) -> argparse.ArgumentParser: subparser = subparsers.add_parser(name=self.subparser_name) self._update_subparser(subparser) subparser.add_argument("--model_name", type=str, help="Name of the model.") diff --git a/paddleocr/_pipelines/base.py b/paddleocr/_pipelines/base.py index 7a34b8f6d89..d512aeeca58 100644 --- a/paddleocr/_pipelines/base.py +++ b/paddleocr/_pipelines/base.py @@ -119,7 +119,9 @@ class PipelineCLISubcommandExecutor(CLISubcommandExecutor): def subparser_name(self) -> str: raise NotImplementedError - def add_subparser(self, subparsers: argparse._SubParsersAction) -> argparse.ArgumentParser: + def add_subparser( + self, subparsers: argparse._SubParsersAction + ) -> argparse.ArgumentParser: subparser = subparsers.add_parser(name=self.subparser_name) self._update_subparser(subparser) add_common_cli_opts( diff --git a/paddleocr/_pipelines/paddleocr_vl.py b/paddleocr/_pipelines/paddleocr_vl.py index 1b027eb617c..27030307473 100644 --- a/paddleocr/_pipelines/paddleocr_vl.py +++ b/paddleocr/_pipelines/paddleocr_vl.py @@ -222,7 +222,11 @@ def concatenate_markdown_pages(self, markdown_list: list[str]) -> str: return self.paddlex_pipeline.concatenate_markdown_pages(markdown_list) def restructure_pages( - self, res_list: list[Any], merge_tables: bool = True, relevel_titles: bool = True, concatenate_pages: bool = False + self, + res_list: list[Any], + merge_tables: bool = True, + relevel_titles: bool = True, + concatenate_pages: bool = False, ) -> list[Any]: return list( self.paddlex_pipeline.restructure_pages( diff --git a/paddleocr/_pipelines/pp_chatocrv4_doc.py b/paddleocr/_pipelines/pp_chatocrv4_doc.py index d294885bc3a..cd19eaa6d8b 100644 --- a/paddleocr/_pipelines/pp_chatocrv4_doc.py +++ b/paddleocr/_pipelines/pp_chatocrv4_doc.py @@ -88,14 +88,21 @@ def __init__( def _paddlex_pipeline_name(self) -> str: return "PP-ChatOCRv4-doc" - def save_vector(self, vector_info: Any, save_path: str, retriever_config: dict[str, Any] | None = None) -> Any: + def save_vector( + self, + vector_info: Any, + save_path: str, + retriever_config: dict[str, Any] | None = None, + ) -> Any: return self.paddlex_pipeline.save_vector( vector_info=vector_info, save_path=save_path, retriever_config=retriever_config, ) - def load_vector(self, data_path: str, retriever_config: dict[str, Any] | None = None) -> Any: + def load_vector( + self, data_path: str, retriever_config: dict[str, Any] | None = None + ) -> Any: return self.paddlex_pipeline.load_vector( data_path=data_path, retriever_config=retriever_config ) @@ -233,7 +240,13 @@ def build_vector( retriever_config=retriever_config, ) - def mllm_pred(self, input: InputType, key_list: list[str], *, mllm_chat_bot_config: dict[str, Any] | None = None) -> Any: + def mllm_pred( + self, + input: InputType, + key_list: list[str], + *, + mllm_chat_bot_config: dict[str, Any] | None = None, + ) -> Any: return self.paddlex_pipeline.mllm_pred( input, key_list, diff --git a/paddleocr/_pipelines/pp_structurev3.py b/paddleocr/_pipelines/pp_structurev3.py index e1b57092ca4..14478bda10d 100644 --- a/paddleocr/_pipelines/pp_structurev3.py +++ b/paddleocr/_pipelines/pp_structurev3.py @@ -534,7 +534,9 @@ def _get_paddlex_config_overrides(self) -> dict[str, Any]: } return create_config_from_structure(STRUCTURE) - def _get_ocr_model_names(self, lang: str | None, ppocr_version: str | None) -> tuple[str | None, str | None]: + def _get_ocr_model_names( + self, lang: str | None, ppocr_version: str | None + ) -> tuple[str | None, str | None]: LATIN_LANGS = [ "af", "az", diff --git a/paddleocr/_pipelines/utils.py b/paddleocr/_pipelines/utils.py index 1385bc67b3d..100953f62c0 100644 --- a/paddleocr/_pipelines/utils.py +++ b/paddleocr/_pipelines/utils.py @@ -17,7 +17,12 @@ from typing import Any -def create_config_from_structure(structure: dict[str, Any], *, unset: Any = None, config: dict[str, Any] | None = None) -> dict[str, Any]: +def create_config_from_structure( + structure: dict[str, Any], + *, + unset: Any = None, + config: dict[str, Any] | None = None, +) -> dict[str, Any]: if config is None: config = {} for k, v in structure.items(): diff --git a/paddleocr/_utils/cli.py b/paddleocr/_utils/cli.py index 5ad29a4bdb2..ea775accc21 100644 --- a/paddleocr/_utils/cli.py +++ b/paddleocr/_utils/cli.py @@ -32,7 +32,9 @@ def get_subcommand_args(args: argparse.Namespace) -> dict[str, Any]: return args_dict -def add_simple_inference_args(subparser: argparse.ArgumentParser, *, input_help: str | None = None) -> None: +def add_simple_inference_args( + subparser: argparse.ArgumentParser, *, input_help: str | None = None +) -> None: if input_help is None: input_help = "Input path or URL." subparser.add_argument( @@ -49,7 +51,11 @@ def add_simple_inference_args(subparser: argparse.ArgumentParser, *, input_help: ) -def perform_simple_inference(wrapper_cls: type, params: dict[str, Any], predict_param_names: set[str] | None = None) -> None: +def perform_simple_inference( + wrapper_cls: type, + params: dict[str, Any], + predict_param_names: set[str] | None = None, +) -> None: params = params.copy() input_ = params.pop("input") From a96572ac9797a5d90cdec91f4af059af2a3c6d00 Mon Sep 17 00:00:00 2001 From: Bvicii Date: Fri, 13 Mar 2026 01:35:35 -0400 Subject: [PATCH 3/4] fix: update mypy config for CI compatibility Bump python_version to 3.9 (mypy dropped 3.8 support) and add PIL to ignore_missing_imports. --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d9ca17553ce..a706ef54c5a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,12 +75,12 @@ markers = [ addopts = "-m 'not resource_intensive'" [tool.mypy] -python_version = "3.8" +python_version = "3.9" packages = ["paddleocr"] warn_return_any = false warn_unused_configs = true disallow_untyped_defs = false [[tool.mypy.overrides]] -module = ["paddlex.*", "paddle.*", "yaml.*", "requests.*"] +module = ["paddlex.*", "paddle.*", "yaml.*", "requests.*", "PIL.*"] ignore_missing_imports = true From 1f69d6ad5fb1c8c8c3a26f172ecdec1b338ec192 Mon Sep 17 00:00:00 2001 From: Bvicii Date: Sat, 14 Mar 2026 00:39:04 -0400 Subject: [PATCH 4/4] fix: align type hints with paddlex signatures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Widen layout param types (threshold, unclip_ratio, merge_bboxes_mode) to match paddlex per-pipeline signatures - Fix DocUnderstanding.predict input type: InputType → dict - Remove dict from layout_unclip_ratio in formula/seal pipelines where paddlex doesn't accept it - Use bare dict/tuple to match paddlex exactly, no refinement --- paddleocr/_models/_object_detection.py | 8 ++++---- paddleocr/_models/_text_detection.py | 2 +- paddleocr/_models/text_recognition.py | 2 +- paddleocr/_pipelines/doc_understanding.py | 6 +++--- paddleocr/_pipelines/formula_recognition.py | 12 ++++++------ paddleocr/_pipelines/ocr.py | 4 ++-- paddleocr/_pipelines/paddleocr_vl.py | 12 ++++++------ paddleocr/_pipelines/pp_chatocrv4_doc.py | 12 ++++++------ paddleocr/_pipelines/pp_doctranslation.py | 12 ++++++------ paddleocr/_pipelines/pp_structurev3.py | 12 ++++++------ paddleocr/_pipelines/seal_recognition.py | 12 ++++++------ 11 files changed, 47 insertions(+), 47 deletions(-) diff --git a/paddleocr/_models/_object_detection.py b/paddleocr/_models/_object_detection.py index 28082b86539..1b1808408d1 100644 --- a/paddleocr/_models/_object_detection.py +++ b/paddleocr/_models/_object_detection.py @@ -31,11 +31,11 @@ class ObjectDetection(PaddleXPredictorWrapper): def __init__( self, *, - img_size: int | None = None, - threshold: float | None = None, + img_size: int | tuple[int, int] | None = None, + threshold: float | dict | None = None, layout_nms: bool | None = None, - layout_unclip_ratio: float | None = None, - layout_merge_bboxes_mode: str | None = None, + layout_unclip_ratio: float | tuple[float, float] | dict | None = None, + layout_merge_bboxes_mode: str | dict | None = None, **kwargs: Any, ) -> None: self._extra_init_args = { diff --git a/paddleocr/_models/_text_detection.py b/paddleocr/_models/_text_detection.py index 3155a139f61..2c2bb20b0ca 100644 --- a/paddleocr/_models/_text_detection.py +++ b/paddleocr/_models/_text_detection.py @@ -27,7 +27,7 @@ def __init__( thresh: float | None = None, box_thresh: float | None = None, unclip_ratio: float | None = None, - input_shape: tuple[int, int, int] | None = None, + input_shape: tuple | None = None, **kwargs: Any, ) -> None: self._extra_init_args = { diff --git a/paddleocr/_models/text_recognition.py b/paddleocr/_models/text_recognition.py index 5392332fe8e..7a56712e6cf 100644 --- a/paddleocr/_models/text_recognition.py +++ b/paddleocr/_models/text_recognition.py @@ -30,7 +30,7 @@ class TextRecognition(PaddleXPredictorWrapper): def __init__( self, *, - input_shape: tuple[int, int, int] | None = None, + input_shape: tuple | None = None, **kwargs: Any, ) -> None: self._extra_init_args = { diff --git a/paddleocr/_pipelines/doc_understanding.py b/paddleocr/_pipelines/doc_understanding.py index 502cfcd6a31..e61e2efb014 100644 --- a/paddleocr/_pipelines/doc_understanding.py +++ b/paddleocr/_pipelines/doc_understanding.py @@ -20,7 +20,7 @@ from paddlex.utils.pipeline_arguments import custom_type from .._abstract import CLISubcommandExecutor -from .._types import InputType, PredictResult +from .._types import PredictResult from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, @@ -50,12 +50,12 @@ def __init__( def _paddlex_pipeline_name(self) -> str: return "doc_understanding" - def predict_iter(self, input: InputType, **kwargs: Any) -> Iterator[PredictResult]: + def predict_iter(self, input: dict, **kwargs: Any) -> Iterator[PredictResult]: return self.paddlex_pipeline.predict(input, **kwargs) def predict( self, - input: InputType, + input: dict, **kwargs: Any, ) -> list[PredictResult]: return list(self.predict_iter(input, **kwargs)) diff --git a/paddleocr/_pipelines/formula_recognition.py b/paddleocr/_pipelines/formula_recognition.py index 9b6aaa30d15..98eaba6a626 100644 --- a/paddleocr/_pipelines/formula_recognition.py +++ b/paddleocr/_pipelines/formula_recognition.py @@ -42,9 +42,9 @@ def __init__( use_doc_unwarping: bool | None = None, layout_detection_model_name: str | None = None, layout_detection_model_dir: str | None = None, - layout_threshold: float | None = None, + layout_threshold: float | dict | None = None, layout_nms: bool | None = None, - layout_unclip_ratio: float | None = None, + layout_unclip_ratio: float | tuple[float, float] | None = None, layout_merge_bboxes_mode: str | None = None, layout_detection_batch_size: int | None = None, use_layout_detection: bool | None = None, @@ -72,9 +72,9 @@ def predict_iter( use_doc_orientation_classify: bool | None = None, use_doc_unwarping: bool | None = None, layout_det_res: Any = None, - layout_threshold: float | None = None, + layout_threshold: float | dict | None = None, layout_nms: bool | None = None, - layout_unclip_ratio: float | None = None, + layout_unclip_ratio: float | tuple[float, float] | None = None, layout_merge_bboxes_mode: str | None = None, **kwargs: Any, ) -> Iterator[PredictResult]: @@ -99,9 +99,9 @@ def predict( use_doc_orientation_classify: bool | None = None, use_doc_unwarping: bool | None = None, layout_det_res: Any = None, - layout_threshold: float | None = None, + layout_threshold: float | dict | None = None, layout_nms: bool | None = None, - layout_unclip_ratio: float | None = None, + layout_unclip_ratio: float | tuple[float, float] | None = None, layout_merge_bboxes_mode: str | None = None, **kwargs: Any, ) -> list[PredictResult]: diff --git a/paddleocr/_pipelines/ocr.py b/paddleocr/_pipelines/ocr.py index 31fd1a805b4..5ad132a6899 100644 --- a/paddleocr/_pipelines/ocr.py +++ b/paddleocr/_pipelines/ocr.py @@ -81,10 +81,10 @@ def __init__( text_det_thresh: float | None = None, text_det_box_thresh: float | None = None, text_det_unclip_ratio: float | None = None, - text_det_input_shape: tuple[int, int, int] | None = None, + text_det_input_shape: tuple | None = None, text_rec_score_thresh: float | None = None, return_word_box: bool | None = None, - text_rec_input_shape: tuple[int, int, int] | None = None, + text_rec_input_shape: tuple | None = None, lang: str | None = None, ocr_version: str | None = None, **kwargs: Any, diff --git a/paddleocr/_pipelines/paddleocr_vl.py b/paddleocr/_pipelines/paddleocr_vl.py index 27030307473..28b3bc84374 100644 --- a/paddleocr/_pipelines/paddleocr_vl.py +++ b/paddleocr/_pipelines/paddleocr_vl.py @@ -47,9 +47,9 @@ def __init__( pipeline_version: str = _DEFAULT_PIPELINE_VERSION, layout_detection_model_name: str | None = None, layout_detection_model_dir: str | None = None, - layout_threshold: float | None = None, + layout_threshold: float | dict | None = None, layout_nms: bool | None = None, - layout_unclip_ratio: float | None = None, + layout_unclip_ratio: float | tuple[float, float] | dict | None = None, layout_merge_bboxes_mode: str | None = None, vl_rec_model_name: str | None = None, vl_rec_model_dir: str | None = None, @@ -112,9 +112,9 @@ def predict_iter( use_chart_recognition: bool | None = None, use_seal_recognition: bool | None = None, use_ocr_for_image_block: bool | None = None, - layout_threshold: float | None = None, + layout_threshold: float | dict | None = None, layout_nms: bool | None = None, - layout_unclip_ratio: float | None = None, + layout_unclip_ratio: float | tuple[float, float] | dict | None = None, layout_merge_bboxes_mode: str | None = None, layout_shape_mode: str = "auto", use_queues: bool | None = None, @@ -169,9 +169,9 @@ def predict( use_chart_recognition: bool | None = None, use_seal_recognition: bool | None = None, use_ocr_for_image_block: bool | None = None, - layout_threshold: float | None = None, + layout_threshold: float | dict | None = None, layout_nms: bool | None = None, - layout_unclip_ratio: float | None = None, + layout_unclip_ratio: float | tuple[float, float] | dict | None = None, layout_merge_bboxes_mode: str | None = None, layout_shape_mode: str = "auto", use_queues: bool | None = None, diff --git a/paddleocr/_pipelines/pp_chatocrv4_doc.py b/paddleocr/_pipelines/pp_chatocrv4_doc.py index cd19eaa6d8b..c816961bcaa 100644 --- a/paddleocr/_pipelines/pp_chatocrv4_doc.py +++ b/paddleocr/_pipelines/pp_chatocrv4_doc.py @@ -56,9 +56,9 @@ def __init__( use_textline_orientation: bool | None = None, use_seal_recognition: bool | None = None, use_table_recognition: bool | None = None, - layout_threshold: float | None = None, + layout_threshold: float | dict | None = None, layout_nms: bool | None = None, - layout_unclip_ratio: float | None = None, + layout_unclip_ratio: float | tuple[float, float] | dict | None = None, layout_merge_bboxes_mode: str | None = None, text_det_limit_side_len: int | None = None, text_det_limit_type: str | None = None, @@ -124,9 +124,9 @@ def visual_predict_iter( use_textline_orientation: bool | None = None, use_seal_recognition: bool | None = None, use_table_recognition: bool | None = None, - layout_threshold: float | None = None, + layout_threshold: float | dict | None = None, layout_nms: bool | None = None, - layout_unclip_ratio: float | None = None, + layout_unclip_ratio: float | tuple[float, float] | dict | None = None, layout_merge_bboxes_mode: str | None = None, text_det_limit_side_len: int | None = None, text_det_limit_type: str | None = None, @@ -177,9 +177,9 @@ def visual_predict( use_textline_orientation: bool | None = None, use_seal_recognition: bool | None = None, use_table_recognition: bool | None = None, - layout_threshold: float | None = None, + layout_threshold: float | dict | None = None, layout_nms: bool | None = None, - layout_unclip_ratio: float | None = None, + layout_unclip_ratio: float | tuple[float, float] | dict | None = None, layout_merge_bboxes_mode: str | None = None, text_det_limit_side_len: int | None = None, text_det_limit_type: str | None = None, diff --git a/paddleocr/_pipelines/pp_doctranslation.py b/paddleocr/_pipelines/pp_doctranslation.py index 14ae3716f4f..8fcdb0acc0f 100644 --- a/paddleocr/_pipelines/pp_doctranslation.py +++ b/paddleocr/_pipelines/pp_doctranslation.py @@ -33,9 +33,9 @@ def __init__( self, layout_detection_model_name: str | None = None, layout_detection_model_dir: str | None = None, - layout_threshold: float | None = None, + layout_threshold: float | dict | None = None, layout_nms: bool | None = None, - layout_unclip_ratio: float | None = None, + layout_unclip_ratio: float | tuple[float, float] | dict | None = None, layout_merge_bboxes_mode: str | None = None, chart_recognition_model_name: str | None = None, chart_recognition_model_dir: str | None = None, @@ -120,9 +120,9 @@ def visual_predict_iter( use_formula_recognition: bool | None = None, use_chart_recognition: bool | None = None, use_region_detection: bool | None = None, - layout_threshold: float | None = None, + layout_threshold: float | dict | None = None, layout_nms: bool | None = None, - layout_unclip_ratio: float | None = None, + layout_unclip_ratio: float | tuple[float, float] | dict | None = None, layout_merge_bboxes_mode: str | None = None, text_det_limit_side_len: int | None = None, text_det_limit_type: str | None = None, @@ -191,9 +191,9 @@ def visual_predict( use_formula_recognition: bool | None = None, use_chart_recognition: bool | None = None, use_region_detection: bool | None = None, - layout_threshold: float | None = None, + layout_threshold: float | dict | None = None, layout_nms: bool | None = None, - layout_unclip_ratio: float | None = None, + layout_unclip_ratio: float | tuple[float, float] | dict | None = None, layout_merge_bboxes_mode: str | None = None, text_det_limit_side_len: int | None = None, text_det_limit_type: str | None = None, diff --git a/paddleocr/_pipelines/pp_structurev3.py b/paddleocr/_pipelines/pp_structurev3.py index 14478bda10d..87d064a1da8 100644 --- a/paddleocr/_pipelines/pp_structurev3.py +++ b/paddleocr/_pipelines/pp_structurev3.py @@ -40,9 +40,9 @@ def __init__( self, layout_detection_model_name: str | None = None, layout_detection_model_dir: str | None = None, - layout_threshold: float | None = None, + layout_threshold: float | dict | None = None, layout_nms: bool | None = None, - layout_unclip_ratio: float | None = None, + layout_unclip_ratio: float | tuple[float, float] | dict | None = None, layout_merge_bboxes_mode: str | None = None, chart_recognition_model_name: str | None = None, chart_recognition_model_dir: str | None = None, @@ -165,9 +165,9 @@ def predict_iter( use_chart_recognition: bool | None = None, use_region_detection: bool | None = None, format_block_content: bool | None = None, - layout_threshold: float | None = None, + layout_threshold: float | dict | None = None, layout_nms: bool | None = None, - layout_unclip_ratio: float | None = None, + layout_unclip_ratio: float | tuple[float, float] | dict | None = None, layout_merge_bboxes_mode: str | None = None, text_det_limit_side_len: int | None = None, text_det_limit_type: str | None = None, @@ -240,9 +240,9 @@ def predict( use_chart_recognition: bool | None = None, use_region_detection: bool | None = None, format_block_content: bool | None = None, - layout_threshold: float | None = None, + layout_threshold: float | dict | None = None, layout_nms: bool | None = None, - layout_unclip_ratio: float | None = None, + layout_unclip_ratio: float | tuple[float, float] | dict | None = None, layout_merge_bboxes_mode: str | None = None, text_det_limit_side_len: int | None = None, text_det_limit_type: str | None = None, diff --git a/paddleocr/_pipelines/seal_recognition.py b/paddleocr/_pipelines/seal_recognition.py index 75ddc2edd2c..95bc6420200 100644 --- a/paddleocr/_pipelines/seal_recognition.py +++ b/paddleocr/_pipelines/seal_recognition.py @@ -46,9 +46,9 @@ def __init__( use_doc_orientation_classify: bool | None = None, use_doc_unwarping: bool | None = None, use_layout_detection: bool | None = None, - layout_threshold: float | None = None, + layout_threshold: float | dict | None = None, layout_nms: bool | None = None, - layout_unclip_ratio: float | None = None, + layout_unclip_ratio: float | tuple[float, float] | None = None, layout_merge_bboxes_mode: str | None = None, seal_det_limit_side_len: int | None = None, seal_det_limit_type: str | None = None, @@ -99,9 +99,9 @@ def predict_iter( use_doc_unwarping: bool | None = None, use_layout_detection: bool | None = None, layout_det_res: Any = None, - layout_threshold: float | None = None, + layout_threshold: float | dict | None = None, layout_nms: bool | None = None, - layout_unclip_ratio: float | None = None, + layout_unclip_ratio: float | tuple[float, float] | None = None, layout_merge_bboxes_mode: str | None = None, seal_det_limit_side_len: int | None = None, seal_det_limit_type: str | None = None, @@ -138,9 +138,9 @@ def predict( use_doc_unwarping: bool | None = None, use_layout_detection: bool | None = None, layout_det_res: Any = None, - layout_threshold: float | None = None, + layout_threshold: float | dict | None = None, layout_nms: bool | None = None, - layout_unclip_ratio: float | None = None, + layout_unclip_ratio: float | tuple[float, float] | None = None, layout_merge_bboxes_mode: str | None = None, seal_det_limit_side_len: int | None = None, seal_det_limit_type: str | None = None,