Skip to content

Commit 8c6e1a4

Browse files
committed
feat: add typetransformer based on pydantic basemodel
Signed-off-by: Artem Inzhyyants <[email protected]>
1 parent e8ec233 commit 8c6e1a4

File tree

3 files changed

+187
-6
lines changed

3 files changed

+187
-6
lines changed

airbyte_cdk/sources/utils/transform.py

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,19 @@
11
#
22
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
33
#
4-
4+
import importlib
55
import logging
6+
import sys
7+
from dataclasses import dataclass
68
from enum import Flag, auto
9+
from functools import lru_cache
10+
from pathlib import Path
11+
from tempfile import TemporaryDirectory
712
from typing import Any, Callable, Dict, Generator, Mapping, Optional, cast
813

14+
from datamodel_code_generator import DataModelType, InputFileType, generate
915
from jsonschema import Draft7Validator, RefResolver, ValidationError, Validator, validators
16+
from pydantic import BaseModel
1017

1118
MAX_NESTING_DEPTH = 3
1219
json_to_python_simple = {
@@ -275,3 +282,33 @@ def _get_type_structure(self, input_data: Any, current_depth: int = 0) -> Any:
275282

276283
else:
277284
return python_to_json[type(input_data)]
285+
286+
287+
@dataclass(frozen=True)
288+
class PydanticTypeTransformer:
289+
@lru_cache
290+
def stream_model(self, json_schema: str) -> BaseModel:
291+
with TemporaryDirectory() as temporary_directory_name:
292+
temporary_directory = Path(temporary_directory_name)
293+
output = Path(temporary_directory / "models.py")
294+
generate(
295+
str(json_schema),
296+
input_file_type=InputFileType.Auto,
297+
input_filename="example.json",
298+
output=output,
299+
class_name="NormalizationModel",
300+
output_model_type=DataModelType.PydanticV2BaseModel,
301+
)
302+
303+
# Load the generated models.py dynamically
304+
spec = importlib.util.spec_from_file_location("models", output)
305+
module = importlib.util.module_from_spec(spec)
306+
sys.modules["models"] = module
307+
spec.loader.exec_module(module)
308+
309+
normalization_model = getattr(module, "NormalizationModel")
310+
return normalization_model
311+
312+
def transform(self, record: Dict[str, Any], schema: Mapping[str, Any]) -> None:
313+
model: BaseModel = self.stream_model(str(schema))
314+
record.update(model(**record).model_dump())

poetry.lock

Lines changed: 148 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ python = ">=3.10,<3.13"
3333
airbyte-protocol-models-dataclasses = "^0.14"
3434
backoff = "*"
3535
cachetools = "*"
36+
datamodel-code-generator = "^0.28.2"
3637
dpath = "^2.1.6"
3738
dunamai = "^1.22.0"
3839
genson = "1.3.0"

0 commit comments

Comments
 (0)