From 8c6e1a4d125077bd8d93c17de8fa279d40a86691 Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants Date: Thu, 6 Mar 2025 16:39:42 +0100 Subject: [PATCH 1/5] feat: add typetransformer based on pydantic basemodel Signed-off-by: Artem Inzhyyants --- airbyte_cdk/sources/utils/transform.py | 39 ++++++- poetry.lock | 153 ++++++++++++++++++++++++- pyproject.toml | 1 + 3 files changed, 187 insertions(+), 6 deletions(-) diff --git a/airbyte_cdk/sources/utils/transform.py b/airbyte_cdk/sources/utils/transform.py index e19aad3a3..d715f4c34 100644 --- a/airbyte_cdk/sources/utils/transform.py +++ b/airbyte_cdk/sources/utils/transform.py @@ -1,12 +1,19 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # - +import importlib import logging +import sys +from dataclasses import dataclass from enum import Flag, auto +from functools import lru_cache +from pathlib import Path +from tempfile import TemporaryDirectory from typing import Any, Callable, Dict, Generator, Mapping, Optional, cast +from datamodel_code_generator import DataModelType, InputFileType, generate from jsonschema import Draft7Validator, RefResolver, ValidationError, Validator, validators +from pydantic import BaseModel MAX_NESTING_DEPTH = 3 json_to_python_simple = { @@ -275,3 +282,33 @@ def _get_type_structure(self, input_data: Any, current_depth: int = 0) -> Any: else: return python_to_json[type(input_data)] + + +@dataclass(frozen=True) +class PydanticTypeTransformer: + @lru_cache + def stream_model(self, json_schema: str) -> BaseModel: + with TemporaryDirectory() as temporary_directory_name: + temporary_directory = Path(temporary_directory_name) + output = Path(temporary_directory / "models.py") + generate( + str(json_schema), + input_file_type=InputFileType.Auto, + input_filename="example.json", + output=output, + class_name="NormalizationModel", + output_model_type=DataModelType.PydanticV2BaseModel, + ) + + # Load the generated models.py dynamically + spec = importlib.util.spec_from_file_location("models", output) + module = importlib.util.module_from_spec(spec) + sys.modules["models"] = module + spec.loader.exec_module(module) + + normalization_model = getattr(module, "NormalizationModel") + return normalization_model + + def transform(self, record: Dict[str, Any], schema: Mapping[str, Any]) -> None: + model: BaseModel = self.stream_model(str(schema)) + record.update(model(**record).model_dump()) diff --git a/poetry.lock b/poetry.lock index 94f50c0a7..6c41bb19b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -192,6 +192,22 @@ doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21)"] trio = ["trio (>=0.26.1)"] +[[package]] +name = "argcomplete" +version = "3.6.0" +description = "Bash tab completion for argparse" +optional = false +python-versions = ">=3.8" +groups = ["main"] +markers = "python_version <= \"3.11\" or python_version >= \"3.12\"" +files = [ + {file = "argcomplete-3.6.0-py3-none-any.whl", hash = "sha256:4e3e4e10beb20e06444dbac0ac8dda650cb6349caeefe980208d3c548708bedd"}, + {file = "argcomplete-3.6.0.tar.gz", hash = "sha256:2e4e42ec0ba2fff54b0d244d0b1623e86057673e57bafe72dda59c64bd5dee8b"}, +] + +[package.extras] +test = ["coverage", "mypy", "pexpect", "ruff", "wheel"] + [[package]] name = "async-timeout" version = "4.0.3" @@ -307,6 +323,54 @@ charset-normalizer = ["charset-normalizer"] html5lib = ["html5lib"] lxml = ["lxml"] +[[package]] +name = "black" +version = "25.1.0" +description = "The uncompromising code formatter." +optional = false +python-versions = ">=3.9" +groups = ["main"] +markers = "python_version <= \"3.11\" or python_version >= \"3.12\"" +files = [ + {file = "black-25.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:759e7ec1e050a15f89b770cefbf91ebee8917aac5c20483bc2d80a6c3a04df32"}, + {file = "black-25.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0e519ecf93120f34243e6b0054db49c00a35f84f195d5bce7e9f5cfc578fc2da"}, + {file = "black-25.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:055e59b198df7ac0b7efca5ad7ff2516bca343276c466be72eb04a3bcc1f82d7"}, + {file = "black-25.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:db8ea9917d6f8fc62abd90d944920d95e73c83a5ee3383493e35d271aca872e9"}, + {file = "black-25.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a39337598244de4bae26475f77dda852ea00a93bd4c728e09eacd827ec929df0"}, + {file = "black-25.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:96c1c7cd856bba8e20094e36e0f948718dc688dba4a9d78c3adde52b9e6c2299"}, + {file = "black-25.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bce2e264d59c91e52d8000d507eb20a9aca4a778731a08cfff7e5ac4a4bb7096"}, + {file = "black-25.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:172b1dbff09f86ce6f4eb8edf9dede08b1fce58ba194c87d7a4f1a5aa2f5b3c2"}, + {file = "black-25.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4b60580e829091e6f9238c848ea6750efed72140b91b048770b64e74fe04908b"}, + {file = "black-25.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1e2978f6df243b155ef5fa7e558a43037c3079093ed5d10fd84c43900f2d8ecc"}, + {file = "black-25.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b48735872ec535027d979e8dcb20bf4f70b5ac75a8ea99f127c106a7d7aba9f"}, + {file = "black-25.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:ea0213189960bda9cf99be5b8c8ce66bb054af5e9e861249cd23471bd7b0b3ba"}, + {file = "black-25.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8f0b18a02996a836cc9c9c78e5babec10930862827b1b724ddfe98ccf2f2fe4f"}, + {file = "black-25.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:afebb7098bfbc70037a053b91ae8437c3857482d3a690fefc03e9ff7aa9a5fd3"}, + {file = "black-25.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:030b9759066a4ee5e5aca28c3c77f9c64789cdd4de8ac1df642c40b708be6171"}, + {file = "black-25.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:a22f402b410566e2d1c950708c77ebf5ebd5d0d88a6a2e87c86d9fb48afa0d18"}, + {file = "black-25.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a1ee0a0c330f7b5130ce0caed9936a904793576ef4d2b98c40835d6a65afa6a0"}, + {file = "black-25.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f3df5f1bf91d36002b0a75389ca8663510cf0531cca8aa5c1ef695b46d98655f"}, + {file = "black-25.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d9e6827d563a2c820772b32ce8a42828dc6790f095f441beef18f96aa6f8294e"}, + {file = "black-25.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:bacabb307dca5ebaf9c118d2d2f6903da0d62c9faa82bd21a33eecc319559355"}, + {file = "black-25.1.0-py3-none-any.whl", hash = "sha256:95e8176dae143ba9097f351d174fdaf0ccd29efb414b362ae3fd72bf0f710717"}, + {file = "black-25.1.0.tar.gz", hash = "sha256:33496d5cd1222ad73391352b4ae8da15253c5de89b93a80b3e2c8d9a19ec2666"}, +] + +[package.dependencies] +click = ">=8.0.0" +mypy-extensions = ">=0.4.3" +packaging = ">=22.0" +pathspec = ">=0.9.0" +platformdirs = ">=2" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""} + +[package.extras] +colorama = ["colorama (>=0.4.3)"] +d = ["aiohttp (>=3.10)"] +jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] +uvloop = ["uvloop (>=0.15.2)"] + [[package]] name = "bracex" version = "2.5.post1" @@ -575,7 +639,7 @@ files = [ name = "click" version = "8.1.8" description = "Composable command line interface toolkit" -optional = true +optional = false python-versions = ">=3.7" groups = ["main"] markers = "python_version <= \"3.11\" or python_version >= \"3.12\"" @@ -972,6 +1036,38 @@ files = [ marshmallow = ">=3.18.0,<4.0.0" typing-inspect = ">=0.4.0,<1" +[[package]] +name = "datamodel-code-generator" +version = "0.28.2" +description = "Datamodel Code Generator" +optional = false +python-versions = ">=3.9" +groups = ["main"] +markers = "python_version <= \"3.11\" or python_version >= \"3.12\"" +files = [ + {file = "datamodel_code_generator-0.28.2-py3-none-any.whl", hash = "sha256:a2c425386c3f836c618ae276be57e460df323ac78f911b1b12d927ddffd70e73"}, + {file = "datamodel_code_generator-0.28.2.tar.gz", hash = "sha256:5f16fe4d6acee79c1366f9ee68016eeec544fc0a2fec25ce47d35f7b7767e0fe"}, +] + +[package.dependencies] +argcomplete = ">=2.10.1,<4" +black = ">=19.10b0" +genson = ">=1.2.1,<2" +inflect = ">=4.1,<6" +isort = ">=4.3.21,<7" +jinja2 = ">=2.10.1,<4" +packaging = "*" +pydantic = ">=1.5" +pyyaml = ">=6.0.1" +tomli = {version = ">=2.2.1,<3", markers = "python_version <= \"3.11\""} + +[package.extras] +all = ["graphql-core (>=3.2.3)", "httpx (>=0.24.1)", "openapi-spec-validator (>=0.2.8,<0.7)", "prance (>=0.18.2)", "pysnooper (>=0.4.1,<2)"] +debug = ["pysnooper (>=0.4.1,<2)"] +graphql = ["graphql-core (>=3.2.3)"] +http = ["httpx (>=0.24.1)"] +validation = ["openapi-spec-validator (>=0.2.8,<0.7)", "prance (>=0.18.2)"] + [[package]] name = "dpath" version = "2.2.0" @@ -1513,6 +1609,23 @@ docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.link perf = ["ipython"] testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)", "pytest-ruff"] +[[package]] +name = "inflect" +version = "5.6.2" +description = "Correctly generate plurals, singular nouns, ordinals, indefinite articles; convert numbers to words" +optional = false +python-versions = ">=3.7" +groups = ["main"] +markers = "python_version <= \"3.11\" or python_version >= \"3.12\"" +files = [ + {file = "inflect-5.6.2-py3-none-any.whl", hash = "sha256:b45d91a4a28a4e617ff1821117439b06eaa86e2a4573154af0149e9be6687238"}, + {file = "inflect-5.6.2.tar.gz", hash = "sha256:aadc7ed73928f5e014129794bbac03058cca35d0a973a5fc4eb45c7fa26005f9"}, +] + +[package.extras] +docs = ["jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx"] +testing = ["pygments", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] + [[package]] name = "iniconfig" version = "2.0.0" @@ -1542,6 +1655,23 @@ files = [ [package.dependencies] six = "*" +[[package]] +name = "isort" +version = "6.0.1" +description = "A Python utility / library to sort Python imports." +optional = false +python-versions = ">=3.9.0" +groups = ["main"] +markers = "python_version <= \"3.11\" or python_version >= \"3.12\"" +files = [ + {file = "isort-6.0.1-py3-none-any.whl", hash = "sha256:2dc5d7f65c9678d94c88dfc29161a320eec67328bc97aad576874cb4be1e9615"}, + {file = "isort-6.0.1.tar.gz", hash = "sha256:1cb5df28dfbc742e490c5e41bad6da41b805b0a8be7bc93cd0fb2a8a890ac450"}, +] + +[package.extras] +colors = ["colorama"] +plugins = ["setuptools"] + [[package]] name = "jinja2" version = "3.1.5" @@ -2546,11 +2676,11 @@ description = "Type system extensions for programs checked with the mypy type ch optional = false python-versions = ">=3.5" groups = ["main", "dev"] +markers = "python_version <= \"3.11\" or python_version >= \"3.12\"" files = [ {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, ] -markers = {main = "(extra == \"vector-db-based\" or extra == \"file-based\") and (python_version <= \"3.11\" or python_version >= \"3.12\")", dev = "python_version <= \"3.11\" or python_version >= \"3.12\""} [[package]] name = "nltk" @@ -2883,6 +3013,19 @@ files = [ {file = "pastel-0.2.1.tar.gz", hash = "sha256:e6581ac04e973cac858828c6202c1e1e81fee1dc7de7683f3e1ffe0bfd8a573d"}, ] +[[package]] +name = "pathspec" +version = "0.12.1" +description = "Utility library for gitignore style pattern matching of file paths." +optional = false +python-versions = ">=3.8" +groups = ["main"] +markers = "python_version <= \"3.11\" or python_version >= \"3.12\"" +files = [ + {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"}, + {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, +] + [[package]] name = "pdf2image" version = "1.16.3" @@ -4824,8 +4967,7 @@ version = "2.2.1" description = "A lil' TOML parser" optional = false python-versions = ">=3.8" -groups = ["dev"] -markers = "python_version <= \"3.11\" or python_version >= \"3.12\"" +groups = ["main", "dev"] files = [ {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"}, {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"}, @@ -4860,6 +5002,7 @@ files = [ {file = "tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc"}, {file = "tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff"}, ] +markers = {main = "python_version <= \"3.11\"", dev = "python_version <= \"3.11\" or python_version >= \"3.12\""} [[package]] name = "tqdm" @@ -5418,4 +5561,4 @@ vector-db-based = ["cohere", "langchain", "openai", "tiktoken"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<3.13" -content-hash = "dc50be30e74dea64afa594eb2267a0b782cb5cac11ae7f5c52b09269982b3ffd" +content-hash = "e5cb4da367c18c722bd03ab98ad8d2075af4f388527a05d24c3626e69f420adf" diff --git a/pyproject.toml b/pyproject.toml index 7f8201692..d4a0c4570 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,6 +33,7 @@ python = ">=3.10,<3.13" airbyte-protocol-models-dataclasses = "^0.14" backoff = "*" cachetools = "*" +datamodel-code-generator = "^0.28.2" dpath = "^2.1.6" dunamai = "^1.22.0" genson = "1.3.0" From d29b488fbfd60f815adae4a8fa2810c684f00c97 Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants Date: Mon, 10 Mar 2025 11:33:09 +0100 Subject: [PATCH 2/5] add: _custom_model_normalizer for PydanticTypeTransformer Signed-off-by: Artem Inzhyyants --- airbyte_cdk/sources/utils/transform.py | 97 ++++++++++++++++++++++++-- 1 file changed, 92 insertions(+), 5 deletions(-) diff --git a/airbyte_cdk/sources/utils/transform.py b/airbyte_cdk/sources/utils/transform.py index d715f4c34..bf2457bda 100644 --- a/airbyte_cdk/sources/utils/transform.py +++ b/airbyte_cdk/sources/utils/transform.py @@ -4,12 +4,11 @@ import importlib import logging import sys -from dataclasses import dataclass from enum import Flag, auto from functools import lru_cache from pathlib import Path from tempfile import TemporaryDirectory -from typing import Any, Callable, Dict, Generator, Mapping, Optional, cast +from typing import Any, Callable, Dict, Generator, Mapping, Optional, Type, cast from datamodel_code_generator import DataModelType, InputFileType, generate from jsonschema import Draft7Validator, RefResolver, ValidationError, Validator, validators @@ -284,10 +283,78 @@ def _get_type_structure(self, input_data: Any, current_depth: int = 0) -> Any: return python_to_json[type(input_data)] -@dataclass(frozen=True) class PydanticTypeTransformer: + """ + A utility class for dynamically generating Pydantic models from JSON schemas + and transforming records using these models. + + This class allows users to generate a Pydantic model from a given JSON schema + at runtime, apply optional modifications to the generated model, and use it + to transform input records by ensuring they conform to the schema. + + Features: + - Dynamically generates Pydantic models from JSON schemas. + - Allows users to register a custom model normalizer for modifying the generated models. + - Provides a caching mechanism for model generation to improve performance. + - Supports transformation of input records to ensure schema compliance. + + Attributes: + _custom_model_normalizer (Callable[[Type[BaseModel]], Type[BaseModel]]): + An optional callback function that allows users to modify the generated model. + + Methods: + register_custom_model(custom_model_callback): + Registers a custom callback function to modify the generated model. + + stream_model(json_schema, model_modifier): + Generates a Pydantic model from a JSON schema and applies an optional model modification function. + + transform(record, schema): + Transforms a given record using a dynamically generated Pydantic model. + """ + + _custom_model_normalizer: Callable[[Type[BaseModel]], Type[BaseModel]] = None + + def register_custom_model( + self, custom_model_callback: Callable[[Type[BaseModel]], Type[BaseModel]] + ) -> Callable[[Type[BaseModel]], Type[BaseModel]]: + """ + Registers a custom normalization callback to modify the dynamically generated model. + + This function allows users to define a transformation function that modifies + the generated Pydantic model before it is used for data validation. + + Args: + custom_model_callback (Callable[[Type[BaseModel]], Type[BaseModel]]): + A function that takes a generated Pydantic model as input and returns a modified version. + + Returns: + Callable[[Type[BaseModel]], Type[BaseModel]]: The registered callback function. + """ + + self._custom_model_normalizer = custom_model_callback + return custom_model_callback + @lru_cache - def stream_model(self, json_schema: str) -> BaseModel: + def stream_model( + self, + json_schema: str, + model_modifier: Optional[Callable[[BaseModel], BaseModel]] = None, + ) -> BaseModel: + """ + Generates a Pydantic model from a given JSON schema. + + This method dynamically creates a Pydantic model using the provided JSON schema. + Optionally, a model modification function can be applied to customize the generated model. + + Args: + json_schema (str): The JSON schema definition as a string. + model_modifier (Optional[Callable[[Type[BaseModel]], Type[BaseModel]]], optional): + A function that modifies the generated model before returning it. + + Returns: + Type[BaseModel]: The generated Pydantic model with optional modifications. + """ with TemporaryDirectory() as temporary_directory_name: temporary_directory = Path(temporary_directory_name) output = Path(temporary_directory / "models.py") @@ -307,8 +374,28 @@ def stream_model(self, json_schema: str) -> BaseModel: spec.loader.exec_module(module) normalization_model = getattr(module, "NormalizationModel") - return normalization_model + + # Apply the user-provided modifier + if model_modifier: + return model_modifier(normalization_model) + + return normalization_model def transform(self, record: Dict[str, Any], schema: Mapping[str, Any]) -> None: + """ + Transforms a given record using a dynamically generated Pydantic model. + + This method ensures that the input record conforms to the provided schema by: + - Generating a Pydantic model from the schema. + - Validating and normalizing the record based on the generated model. + - Updating the record with the validated and transformed values. + + Args: + record (Dict[str, Any]): The data record to be transformed. + schema (Mapping[str, Any]): The schema that defines the expected structure of the record. + + Returns: + None: The function updates the `record` in place. + """ model: BaseModel = self.stream_model(str(schema)) record.update(model(**record).model_dump()) From 94576b133230951b2be2fcaf83149f7a1d4ac2c6 Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants Date: Mon, 10 Mar 2025 12:06:40 +0100 Subject: [PATCH 3/5] fix: fix mypy Signed-off-by: Artem Inzhyyants --- airbyte_cdk/sources/utils/transform.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/airbyte_cdk/sources/utils/transform.py b/airbyte_cdk/sources/utils/transform.py index bf2457bda..7ce99b818 100644 --- a/airbyte_cdk/sources/utils/transform.py +++ b/airbyte_cdk/sources/utils/transform.py @@ -313,7 +313,7 @@ class PydanticTypeTransformer: Transforms a given record using a dynamically generated Pydantic model. """ - _custom_model_normalizer: Callable[[Type[BaseModel]], Type[BaseModel]] = None + _custom_model_normalizer: Optional[Callable[[Type[BaseModel]], Type[BaseModel]]] = None def register_custom_model( self, custom_model_callback: Callable[[Type[BaseModel]], Type[BaseModel]] @@ -339,8 +339,8 @@ def register_custom_model( def stream_model( self, json_schema: str, - model_modifier: Optional[Callable[[BaseModel], BaseModel]] = None, - ) -> BaseModel: + model_modifier: Optional[Callable[[Type[BaseModel]], Type[BaseModel]]] = None, + ) -> Type[BaseModel]: """ Generates a Pydantic model from a given JSON schema. @@ -373,7 +373,7 @@ def stream_model( sys.modules["models"] = module spec.loader.exec_module(module) - normalization_model = getattr(module, "NormalizationModel") + normalization_model: Type[BaseModel] = getattr(module, "NormalizationModel") # Apply the user-provided modifier if model_modifier: @@ -397,5 +397,5 @@ def transform(self, record: Dict[str, Any], schema: Mapping[str, Any]) -> None: Returns: None: The function updates the `record` in place. """ - model: BaseModel = self.stream_model(str(schema)) + model: Type[BaseModel] = self.stream_model(str(schema)) record.update(model(**record).model_dump()) From 8ec18017b7f5c61d0ec526d3a85fc7c5a4969401 Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants Date: Mon, 10 Mar 2025 12:14:35 +0100 Subject: [PATCH 4/5] fix: fix mypy Signed-off-by: Artem Inzhyyants --- airbyte_cdk/sources/utils/transform.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airbyte_cdk/sources/utils/transform.py b/airbyte_cdk/sources/utils/transform.py index 7ce99b818..b86898844 100644 --- a/airbyte_cdk/sources/utils/transform.py +++ b/airbyte_cdk/sources/utils/transform.py @@ -369,9 +369,9 @@ def stream_model( # Load the generated models.py dynamically spec = importlib.util.spec_from_file_location("models", output) - module = importlib.util.module_from_spec(spec) + module = importlib.util.module_from_spec(spec) # type: ignore sys.modules["models"] = module - spec.loader.exec_module(module) + spec.loader.exec_module(module) # type: ignore normalization_model: Type[BaseModel] = getattr(module, "NormalizationModel") From c233a6601d0a74b32123c3bbc62ebbbc4574eae8 Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants Date: Mon, 10 Mar 2025 12:14:46 +0100 Subject: [PATCH 5/5] fix: fix mypy Signed-off-by: Artem Inzhyyants --- airbyte_cdk/sources/utils/transform.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airbyte_cdk/sources/utils/transform.py b/airbyte_cdk/sources/utils/transform.py index b86898844..429018fba 100644 --- a/airbyte_cdk/sources/utils/transform.py +++ b/airbyte_cdk/sources/utils/transform.py @@ -369,9 +369,9 @@ def stream_model( # Load the generated models.py dynamically spec = importlib.util.spec_from_file_location("models", output) - module = importlib.util.module_from_spec(spec) # type: ignore + module = importlib.util.module_from_spec(spec) # type: ignore sys.modules["models"] = module - spec.loader.exec_module(module) # type: ignore + spec.loader.exec_module(module) # type: ignore normalization_model: Type[BaseModel] = getattr(module, "NormalizationModel")