Skip to content

Commit 3c76ef3

Browse files
authored
feat: add CustomSchemaNormalization (#194)
Signed-off-by: Artem Inzhyyants <[email protected]>
1 parent e78eaff commit 3c76ef3

File tree

6 files changed

+129
-16
lines changed

6 files changed

+129
-16
lines changed

airbyte_cdk/sources/declarative/declarative_component_schema.yaml

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -667,6 +667,28 @@ definitions:
667667
$parameters:
668668
type: object
669669
additionalProperties: true
670+
CustomSchemaNormalization:
671+
title: Custom Schema Normalization
672+
description: Schema normalization component whose behavior is derived from a custom code implementation of the connector.
673+
type: object
674+
additionalProperties: true
675+
required:
676+
- type
677+
- class_name
678+
properties:
679+
type:
680+
type: string
681+
enum: [ CustomSchemaNormalization ]
682+
class_name:
683+
title: Class Name
684+
description: Fully-qualified name of the class that will be implementing the custom normalization. The format is `source_<name>.<package>.<class_name>`.
685+
type: string
686+
additionalProperties: true
687+
examples:
688+
- "source_amazon_seller_partner.components.LedgerDetailedViewReportsTypeTransformer"
689+
$parameters:
690+
type: object
691+
additionalProperties: true
670692
CustomStateMigration:
671693
title: Custom State Migration
672694
description: Apply a custom transformation on the input state.
@@ -2600,7 +2622,11 @@ definitions:
26002622
- "$ref": "#/definitions/CustomRecordFilter"
26012623
- "$ref": "#/definitions/RecordFilter"
26022624
schema_normalization:
2603-
"$ref": "#/definitions/SchemaNormalization"
2625+
title: Schema Normalization
2626+
description: Responsible for normalization according to the schema.
2627+
anyOf:
2628+
- "$ref": "#/definitions/SchemaNormalization"
2629+
- "$ref": "#/definitions/CustomSchemaNormalization"
26042630
default: None
26052631
$parameters:
26062632
type: object

airbyte_cdk/sources/declarative/extractors/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,10 @@
99
from airbyte_cdk.sources.declarative.extractors.response_to_file_extractor import (
1010
ResponseToFileExtractor,
1111
)
12+
from airbyte_cdk.sources.declarative.extractors.type_transformer import TypeTransformer
1213

1314
__all__ = [
15+
"TypeTransformer",
1416
"HttpSelector",
1517
"DpathExtractor",
1618
"RecordFilter",

airbyte_cdk/sources/declarative/extractors/record_selector.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,14 @@
1010
from airbyte_cdk.sources.declarative.extractors.http_selector import HttpSelector
1111
from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
1212
from airbyte_cdk.sources.declarative.extractors.record_filter import RecordFilter
13+
from airbyte_cdk.sources.declarative.extractors.type_transformer import (
14+
TypeTransformer as DeclarativeTypeTransformer,
15+
)
1316
from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
1417
from airbyte_cdk.sources.declarative.models import SchemaNormalization
1518
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
1619
from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
17-
from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
18-
19-
SCHEMA_TRANSFORMER_TYPE_MAPPING = {
20-
SchemaNormalization.None_: TransformConfig.NoTransform,
21-
SchemaNormalization.Default: TransformConfig.DefaultSchemaNormalization,
22-
}
20+
from airbyte_cdk.sources.utils.transform import TypeTransformer
2321

2422

2523
@dataclass
@@ -38,7 +36,7 @@ class RecordSelector(HttpSelector):
3836
extractor: RecordExtractor
3937
config: Config
4038
parameters: InitVar[Mapping[str, Any]]
41-
schema_normalization: TypeTransformer
39+
schema_normalization: Union[TypeTransformer, DeclarativeTypeTransformer]
4240
name: str
4341
_name: Union[InterpolatedString, str] = field(init=False, repr=False, default="")
4442
record_filter: Optional[RecordFilter] = None
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#
2+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
3+
#
4+
5+
from abc import ABC, abstractmethod
6+
from dataclasses import dataclass
7+
from typing import Any, Dict, Mapping
8+
9+
10+
@dataclass
11+
class TypeTransformer(ABC):
12+
"""
13+
Abstract base class for implementing type transformation logic.
14+
15+
This class provides a blueprint for defining custom transformations
16+
on data records based on a provided schema. Implementing classes
17+
must override the `transform` method to specify the transformation
18+
logic.
19+
20+
Attributes:
21+
None explicitly defined, as this is a dataclass intended to be
22+
subclassed.
23+
24+
Methods:
25+
transform(record: Dict[str, Any], schema: Mapping[str, Any]) -> None:
26+
Abstract method that must be implemented by subclasses.
27+
It performs a transformation on a given data record based
28+
on the provided schema.
29+
30+
Usage:
31+
To use this class, create a subclass that implements the
32+
`transform` method with the desired transformation logic.
33+
"""
34+
35+
@abstractmethod
36+
def transform(
37+
self,
38+
record: Dict[str, Any],
39+
schema: Mapping[str, Any],
40+
) -> None:
41+
"""
42+
Perform a transformation on a data record based on a given schema.
43+
44+
Args:
45+
record (Dict[str, Any]): The data record to be transformed.
46+
schema (Mapping[str, Any]): The schema that dictates how
47+
the record should be transformed.
48+
49+
Returns:
50+
None
51+
52+
Raises:
53+
NotImplementedError: If the method is not implemented
54+
by a subclass.
55+
"""

airbyte_cdk/sources/declarative/models/declarative_component_schema.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,22 @@ class Config:
268268
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
269269

270270

271+
class CustomSchemaNormalization(BaseModel):
272+
class Config:
273+
extra = Extra.allow
274+
275+
type: Literal["CustomSchemaNormalization"]
276+
class_name: str = Field(
277+
...,
278+
description="Fully-qualified name of the class that will be implementing the custom normalization. The format is `source_<name>.<package>.<class_name>`.",
279+
examples=[
280+
"source_amazon_seller_partner.components.LedgerDetailedViewReportsTypeTransformer"
281+
],
282+
title="Class Name",
283+
)
284+
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
285+
286+
271287
class CustomStateMigration(BaseModel):
272288
class Config:
273289
extra = Extra.allow
@@ -1530,7 +1546,11 @@ class RecordSelector(BaseModel):
15301546
description="Responsible for filtering records to be emitted by the Source.",
15311547
title="Record Filter",
15321548
)
1533-
schema_normalization: Optional[SchemaNormalization] = SchemaNormalization.None_
1549+
schema_normalization: Optional[Union[SchemaNormalization, CustomSchemaNormalization]] = Field(
1550+
SchemaNormalization.None_,
1551+
description="Responsible for normalization according to the schema.",
1552+
title="Schema Normalization",
1553+
)
15341554
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
15351555

15361556

airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,6 @@
8282
from airbyte_cdk.sources.declarative.extractors.record_filter import (
8383
ClientSideIncrementalRecordFilterDecorator,
8484
)
85-
from airbyte_cdk.sources.declarative.extractors.record_selector import (
86-
SCHEMA_TRANSFORMER_TYPE_MAPPING,
87-
)
8885
from airbyte_cdk.sources.declarative.incremental import (
8986
ChildPartitionResumableFullRefreshCursor,
9087
CursorFactory,
@@ -100,7 +97,9 @@
10097
from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_migration import (
10198
LegacyToPerPartitionStateMigration,
10299
)
103-
from airbyte_cdk.sources.declarative.models import CustomStateMigration
100+
from airbyte_cdk.sources.declarative.models import (
101+
CustomStateMigration,
102+
)
104103
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
105104
AddedFieldDefinition as AddedFieldDefinitionModel,
106105
)
@@ -185,6 +184,9 @@
185184
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
186185
CustomSchemaLoader as CustomSchemaLoader,
187186
)
187+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
188+
CustomSchemaNormalization as CustomSchemaNormalizationModel,
189+
)
188190
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
189191
CustomTransformation as CustomTransformationModel,
190192
)
@@ -311,6 +313,9 @@
311313
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
312314
ResponseToFileExtractor as ResponseToFileExtractorModel,
313315
)
316+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
317+
SchemaNormalization as SchemaNormalizationModel,
318+
)
314319
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
315320
SchemaTypeIdentifier as SchemaTypeIdentifierModel,
316321
)
@@ -445,6 +450,11 @@
445450

446451
ComponentDefinition = Mapping[str, Any]
447452

453+
SCHEMA_TRANSFORMER_TYPE_MAPPING = {
454+
SchemaNormalizationModel.None_: TransformConfig.NoTransform,
455+
SchemaNormalizationModel.Default: TransformConfig.DefaultSchemaNormalization,
456+
}
457+
448458

449459
class ModelToComponentFactory:
450460
EPOCH_DATETIME_FORMAT = "%s"
@@ -493,6 +503,7 @@ def _init_mappings(self) -> None:
493503
CustomRequesterModel: self.create_custom_component,
494504
CustomRetrieverModel: self.create_custom_component,
495505
CustomSchemaLoader: self.create_custom_component,
506+
CustomSchemaNormalizationModel: self.create_custom_component,
496507
CustomStateMigration: self.create_custom_component,
497508
CustomPaginationStrategyModel: self.create_custom_component,
498509
CustomPartitionRouterModel: self.create_custom_component,
@@ -2000,7 +2011,6 @@ def create_record_selector(
20002011
client_side_incremental_sync: Dict[str, Any] | None = None,
20012012
**kwargs: Any,
20022013
) -> RecordSelector:
2003-
assert model.schema_normalization is not None # for mypy
20042014
extractor = self._create_component_from_model(
20052015
model=model.extractor, decoder=decoder, config=config
20062016
)
@@ -2018,8 +2028,10 @@ def create_record_selector(
20182028
else None,
20192029
**client_side_incremental_sync,
20202030
)
2021-
schema_normalization = TypeTransformer(
2022-
SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization]
2031+
schema_normalization = (
2032+
TypeTransformer(SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization])
2033+
if isinstance(model.schema_normalization, SchemaNormalizationModel)
2034+
else self._create_component_from_model(model.schema_normalization, config=config) # type: ignore[arg-type] # custom normalization model expected here
20232035
)
20242036

20252037
return RecordSelector(

0 commit comments

Comments
 (0)