Skip to content

Commit 1953fba

Browse files
committed
Added items handling to dynamic schemas
1 parent 4459243 commit 1953fba

File tree

6 files changed

+309
-70
lines changed

6 files changed

+309
-70
lines changed

airbyte_cdk/sources/declarative/declarative_component_schema.yaml

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1793,6 +1793,44 @@ definitions:
17931793
$parameters:
17941794
type: object
17951795
additionalProperties: true
1796+
1797+
PropertyTypesMap:
1798+
title: Types Map
1799+
description: (This component is experimental. Use at your own risk.) Represents a mapping between a current type and its corresponding target type for property.
1800+
type: object
1801+
required:
1802+
- property_name
1803+
- property_type_pinter
1804+
- type_mapping
1805+
properties:
1806+
property_name:
1807+
type: string
1808+
property_type_pointer:
1809+
title: Key Path
1810+
description: List of potentially nested fields describing the full path of the property type to extract.
1811+
type: array
1812+
items:
1813+
- type: string
1814+
type_mapping:
1815+
"$ref": "#/definitions/TypesMap"
1816+
ItemsTypeMap:
1817+
title: Types Map
1818+
description: (This component is experimental. Use at your own risk.) Represents a mapping between a current type and its corresponding target type for property.
1819+
type: object
1820+
required:
1821+
- items_type_pinter
1822+
- type_mapping
1823+
properties:
1824+
property_name:
1825+
type: string
1826+
items_type_pointer:
1827+
title: Items Type Path
1828+
description: List of potentially nested fields describing the full path of the items type to extract.
1829+
type: array
1830+
items:
1831+
- type: string
1832+
type_mapping:
1833+
"$ref": "#/definitions/TypesMap"
17961834
TypesMap:
17971835
title: Types Map
17981836
description: (This component is experimental. Use at your own risk.) Represents a mapping between a current type and its corresponding target type.
@@ -1817,6 +1855,12 @@ definitions:
18171855
type: string
18181856
interpolation_context:
18191857
- raw_schema
1858+
items_type:
1859+
"$ref": "#/definitions/ItemsTypeMap"
1860+
properties_types:
1861+
type: array
1862+
items:
1863+
- "$ref": "#/definitions/PropertyTypesMap"
18201864
SchemaTypeIdentifier:
18211865
title: Schema Type Identifier
18221866
description: (This component is experimental. Use at your own risk.) Identifies schema details for dynamic schema extraction and processing.

airbyte_cdk/sources/declarative/models/declarative_component_schema.py

Lines changed: 109 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -604,7 +604,9 @@ class OAuthAuthenticator(BaseModel):
604604
scopes: Optional[List[str]] = Field(
605605
None,
606606
description="List of scopes that should be granted to the access token.",
607-
examples=[["crm.list.read", "crm.objects.contacts.read", "crm.schema.contacts.read"]],
607+
examples=[
608+
["crm.list.read", "crm.objects.contacts.read", "crm.schema.contacts.read"]
609+
],
608610
title="Scopes",
609611
)
610612
token_expiry_date: Optional[str] = Field(
@@ -726,33 +728,6 @@ class HttpResponseFilter(BaseModel):
726728
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
727729

728730

729-
class TypesMap(BaseModel):
730-
target_type: Union[str, List[str]]
731-
current_type: Union[str, List[str]]
732-
condition: Optional[str] = None
733-
734-
735-
class SchemaTypeIdentifier(BaseModel):
736-
type: Optional[Literal["SchemaTypeIdentifier"]] = None
737-
schema_pointer: Optional[List[str]] = Field(
738-
[],
739-
description="List of nested fields defining the schema field path to extract. Defaults to [].",
740-
title="Schema Path",
741-
)
742-
key_pointer: List[str] = Field(
743-
...,
744-
description="List of potentially nested fields describing the full path of the field key to extract.",
745-
title="Key Path",
746-
)
747-
type_pointer: Optional[List[str]] = Field(
748-
None,
749-
description="List of potentially nested fields describing the full path of the field type to extract.",
750-
title="Type Path",
751-
)
752-
types_mapping: Optional[List[TypesMap]] = None
753-
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
754-
755-
756731
class InlineSchemaLoader(BaseModel):
757732
type: Literal["InlineSchemaLoader"]
758733
schema_: Optional[Dict[str, Any]] = Field(
@@ -1025,24 +1000,28 @@ class OAuthConfigSpecification(BaseModel):
10251000
class Config:
10261001
extra = Extra.allow
10271002

1028-
oauth_user_input_from_connector_config_specification: Optional[Dict[str, Any]] = Field(
1029-
None,
1030-
description="OAuth specific blob. This is a Json Schema used to validate Json configurations used as input to OAuth.\nMust be a valid non-nested JSON that refers to properties from ConnectorSpecification.connectionSpecification\nusing special annotation 'path_in_connector_config'.\nThese are input values the user is entering through the UI to authenticate to the connector, that might also shared\nas inputs for syncing data via the connector.\nExamples:\nif no connector values is shared during oauth flow, oauth_user_input_from_connector_config_specification=[]\nif connector values such as 'app_id' inside the top level are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['app_id']\n }\n }\nif connector values such as 'info.app_id' nested inside another object are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['info', 'app_id']\n }\n }",
1031-
examples=[
1032-
{"app_id": {"type": "string", "path_in_connector_config": ["app_id"]}},
1033-
{
1034-
"app_id": {
1035-
"type": "string",
1036-
"path_in_connector_config": ["info", "app_id"],
1037-
}
1038-
},
1039-
],
1040-
title="OAuth user input",
1003+
oauth_user_input_from_connector_config_specification: Optional[Dict[str, Any]] = (
1004+
Field(
1005+
None,
1006+
description="OAuth specific blob. This is a Json Schema used to validate Json configurations used as input to OAuth.\nMust be a valid non-nested JSON that refers to properties from ConnectorSpecification.connectionSpecification\nusing special annotation 'path_in_connector_config'.\nThese are input values the user is entering through the UI to authenticate to the connector, that might also shared\nas inputs for syncing data via the connector.\nExamples:\nif no connector values is shared during oauth flow, oauth_user_input_from_connector_config_specification=[]\nif connector values such as 'app_id' inside the top level are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['app_id']\n }\n }\nif connector values such as 'info.app_id' nested inside another object are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['info', 'app_id']\n }\n }",
1007+
examples=[
1008+
{"app_id": {"type": "string", "path_in_connector_config": ["app_id"]}},
1009+
{
1010+
"app_id": {
1011+
"type": "string",
1012+
"path_in_connector_config": ["info", "app_id"],
1013+
}
1014+
},
1015+
],
1016+
title="OAuth user input",
1017+
)
10411018
)
1042-
oauth_connector_input_specification: Optional[OauthConnectorInputSpecification] = Field(
1043-
None,
1044-
description='The DeclarativeOAuth specific blob.\nPertains to the fields defined by the connector relating to the OAuth flow.\n\nInterpolation capabilities:\n- The variables placeholders are declared as `{{my_var}}`.\n- The nested resolution variables like `{{ {{my_nested_var}} }}` is allowed as well.\n\n- The allowed interpolation context is:\n + base64Encoder - encode to `base64`, {{ {{my_var_a}}:{{my_var_b}} | base64Encoder }}\n + base64Decorer - decode from `base64` encoded string, {{ {{my_string_variable_or_string_value}} | base64Decoder }}\n + urlEncoder - encode the input string to URL-like format, {{ https://test.host.com/endpoint | urlEncoder}}\n + urlDecorer - decode the input url-encoded string into text format, {{ urlDecoder:https%3A%2F%2Fairbyte.io | urlDecoder}}\n + codeChallengeS256 - get the `codeChallenge` encoded value to provide additional data-provider specific authorisation values, {{ {{state_value}} | codeChallengeS256 }}\n\nExamples:\n - The TikTok Marketing DeclarativeOAuth spec:\n {\n "oauth_connector_input_specification": {\n "type": "object",\n "additionalProperties": false,\n "properties": {\n "consent_url": "https://ads.tiktok.com/marketing_api/auth?{{client_id_key}}={{client_id_value}}&{{redirect_uri_key}}={{ {{redirect_uri_value}} | urlEncoder}}&{{state_key}}={{state_value}}",\n "access_token_url": "https://business-api.tiktok.com/open_api/v1.3/oauth2/access_token/",\n "access_token_params": {\n "{{ auth_code_key }}": "{{ auth_code_value }}",\n "{{ client_id_key }}": "{{ client_id_value }}",\n "{{ client_secret_key }}": "{{ client_secret_value }}"\n },\n "access_token_headers": {\n "Content-Type": "application/json",\n "Accept": "application/json"\n },\n "extract_output": ["data.access_token"],\n "client_id_key": "app_id",\n "client_secret_key": "secret",\n "auth_code_key": "auth_code"\n }\n }\n }',
1045-
title="DeclarativeOAuth Connector Specification",
1019+
oauth_connector_input_specification: Optional[OauthConnectorInputSpecification] = (
1020+
Field(
1021+
None,
1022+
description='The DeclarativeOAuth specific blob.\nPertains to the fields defined by the connector relating to the OAuth flow.\n\nInterpolation capabilities:\n- The variables placeholders are declared as `{{my_var}}`.\n- The nested resolution variables like `{{ {{my_nested_var}} }}` is allowed as well.\n\n- The allowed interpolation context is:\n + base64Encoder - encode to `base64`, {{ {{my_var_a}}:{{my_var_b}} | base64Encoder }}\n + base64Decorer - decode from `base64` encoded string, {{ {{my_string_variable_or_string_value}} | base64Decoder }}\n + urlEncoder - encode the input string to URL-like format, {{ https://test.host.com/endpoint | urlEncoder}}\n + urlDecorer - decode the input url-encoded string into text format, {{ urlDecoder:https%3A%2F%2Fairbyte.io | urlDecoder}}\n + codeChallengeS256 - get the `codeChallenge` encoded value to provide additional data-provider specific authorisation values, {{ {{state_value}} | codeChallengeS256 }}\n\nExamples:\n - The TikTok Marketing DeclarativeOAuth spec:\n {\n "oauth_connector_input_specification": {\n "type": "object",\n "additionalProperties": false,\n "properties": {\n "consent_url": "https://ads.tiktok.com/marketing_api/auth?{{client_id_key}}={{client_id_value}}&{{redirect_uri_key}}={{ {{redirect_uri_value}} | urlEncoder}}&{{state_key}}={{state_value}}",\n "access_token_url": "https://business-api.tiktok.com/open_api/v1.3/oauth2/access_token/",\n "access_token_params": {\n "{{ auth_code_key }}": "{{ auth_code_value }}",\n "{{ client_id_key }}": "{{ client_id_value }}",\n "{{ client_secret_key }}": "{{ client_secret_value }}"\n },\n "access_token_headers": {\n "Content-Type": "application/json",\n "Accept": "application/json"\n },\n "extract_output": ["data.access_token"],\n "client_id_key": "app_id",\n "client_secret_key": "secret",\n "auth_code_key": "auth_code"\n }\n }\n }',
1023+
title="DeclarativeOAuth Connector Specification",
1024+
)
10461025
)
10471026
complete_oauth_output_specification: Optional[Dict[str, Any]] = Field(
10481027
None,
@@ -1060,7 +1039,9 @@ class Config:
10601039
complete_oauth_server_input_specification: Optional[Dict[str, Any]] = Field(
10611040
None,
10621041
description="OAuth specific blob. This is a Json Schema used to validate Json configurations persisted as Airbyte Server configurations.\nMust be a valid non-nested JSON describing additional fields configured by the Airbyte Instance or Workspace Admins to be used by the\nserver when completing an OAuth flow (typically exchanging an auth code for refresh token).\nExamples:\n complete_oauth_server_input_specification={\n client_id: {\n type: string\n },\n client_secret: {\n type: string\n }\n }",
1063-
examples=[{"client_id": {"type": "string"}, "client_secret": {"type": "string"}}],
1042+
examples=[
1043+
{"client_id": {"type": "string"}, "client_secret": {"type": "string"}}
1044+
],
10641045
title="OAuth input specification",
10651046
)
10661047
complete_oauth_server_output_specification: Optional[Dict[str, Any]] = Field(
@@ -1646,7 +1627,9 @@ class RecordSelector(BaseModel):
16461627
description="Responsible for filtering records to be emitted by the Source.",
16471628
title="Record Filter",
16481629
)
1649-
schema_normalization: Optional[Union[SchemaNormalization, CustomSchemaNormalization]] = Field(
1630+
schema_normalization: Optional[
1631+
Union[SchemaNormalization, CustomSchemaNormalization]
1632+
] = Field(
16501633
SchemaNormalization.None_,
16511634
description="Responsible for normalization according to the schema.",
16521635
title="Schema Normalization",
@@ -1820,12 +1803,16 @@ class Config:
18201803
description="Component used to coordinate how records are extracted across stream slices and request pages.",
18211804
title="Retriever",
18221805
)
1823-
incremental_sync: Optional[Union[CustomIncrementalSync, DatetimeBasedCursor]] = Field(
1824-
None,
1825-
description="Component used to fetch data incrementally based on a time field in the data.",
1826-
title="Incremental Sync",
1806+
incremental_sync: Optional[Union[CustomIncrementalSync, DatetimeBasedCursor]] = (
1807+
Field(
1808+
None,
1809+
description="Component used to fetch data incrementally based on a time field in the data.",
1810+
title="Incremental Sync",
1811+
)
1812+
)
1813+
name: Optional[str] = Field(
1814+
"", description="The stream name.", example=["Users"], title="Name"
18271815
)
1828-
name: Optional[str] = Field("", description="The stream name.", example=["Users"], title="Name")
18291816
primary_key: Optional[PrimaryKey] = Field(
18301817
"", description="The primary key of the stream.", title="Primary Key"
18311818
)
@@ -2010,6 +1997,55 @@ class HttpRequester(BaseModel):
20101997
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
20111998

20121999

2000+
class PropertyTypesMap(BaseModel):
2001+
property_name: str
2002+
property_type_pointer: Optional[List[str]] = Field(
2003+
None,
2004+
description="List of potentially nested fields describing the full path of the property type to extract.",
2005+
title="Key Path",
2006+
)
2007+
type_mapping: TypesMap
2008+
2009+
2010+
class ItemsTypeMap(BaseModel):
2011+
property_name: Optional[str] = None
2012+
items_type_pointer: Optional[List[str]] = Field(
2013+
None,
2014+
description="List of potentially nested fields describing the full path of the items type to extract.",
2015+
title="Items Type Path",
2016+
)
2017+
type_mapping: TypesMap
2018+
2019+
2020+
class TypesMap(BaseModel):
2021+
target_type: Union[str, List[str]]
2022+
current_type: Union[str, List[str]]
2023+
condition: Optional[str] = None
2024+
items_type: Optional[ItemsTypeMap] = None
2025+
properties_types: Optional[List[PropertyTypesMap]] = None
2026+
2027+
2028+
class SchemaTypeIdentifier(BaseModel):
2029+
type: Optional[Literal["SchemaTypeIdentifier"]] = None
2030+
schema_pointer: Optional[List[str]] = Field(
2031+
[],
2032+
description="List of nested fields defining the schema field path to extract. Defaults to [].",
2033+
title="Schema Path",
2034+
)
2035+
key_pointer: List[str] = Field(
2036+
...,
2037+
description="List of potentially nested fields describing the full path of the field key to extract.",
2038+
title="Key Path",
2039+
)
2040+
type_pointer: Optional[List[str]] = Field(
2041+
None,
2042+
description="List of potentially nested fields describing the full path of the field type to extract.",
2043+
title="Type Path",
2044+
)
2045+
types_mapping: Optional[List[TypesMap]] = None
2046+
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
2047+
2048+
20132049
class DynamicSchemaLoader(BaseModel):
20142050
type: Literal["DynamicSchemaLoader"]
20152051
retriever: Union[AsyncRetriever, CustomRetriever, SimpleRetriever] = Field(
@@ -2097,7 +2133,11 @@ class SimpleRetriever(BaseModel):
20972133
CustomPartitionRouter,
20982134
ListPartitionRouter,
20992135
SubstreamPartitionRouter,
2100-
List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]],
2136+
List[
2137+
Union[
2138+
CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter
2139+
]
2140+
],
21012141
]
21022142
] = Field(
21032143
[],
@@ -2141,7 +2181,9 @@ class AsyncRetriever(BaseModel):
21412181
)
21422182
download_extractor: Optional[
21432183
Union[CustomRecordExtractor, DpathExtractor, ResponseToFileExtractor]
2144-
] = Field(None, description="Responsible for fetching the records from provided urls.")
2184+
] = Field(
2185+
None, description="Responsible for fetching the records from provided urls."
2186+
)
21452187
creation_requester: Union[CustomRequester, HttpRequester] = Field(
21462188
...,
21472189
description="Requester component that describes how to prepare HTTP requests to send to the source API to create the async server-side job.",
@@ -2175,7 +2217,11 @@ class AsyncRetriever(BaseModel):
21752217
CustomPartitionRouter,
21762218
ListPartitionRouter,
21772219
SubstreamPartitionRouter,
2178-
List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]],
2220+
List[
2221+
Union[
2222+
CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter
2223+
]
2224+
],
21792225
]
21802226
] = Field(
21812227
[],
@@ -2243,10 +2289,12 @@ class DynamicDeclarativeStream(BaseModel):
22432289
stream_template: DeclarativeStream = Field(
22442290
..., description="Reference to the stream template.", title="Stream Template"
22452291
)
2246-
components_resolver: Union[HttpComponentsResolver, ConfigComponentsResolver] = Field(
2247-
...,
2248-
description="Component resolve and populates stream templates with components values.",
2249-
title="Components Resolver",
2292+
components_resolver: Union[HttpComponentsResolver, ConfigComponentsResolver] = (
2293+
Field(
2294+
...,
2295+
description="Component resolve and populates stream templates with components values.",
2296+
title="Components Resolver",
2297+
)
22502298
)
22512299

22522300

@@ -2256,6 +2304,8 @@ class DynamicDeclarativeStream(BaseModel):
22562304
SelectiveAuthenticator.update_forward_refs()
22572305
DeclarativeStream.update_forward_refs()
22582306
SessionTokenAuthenticator.update_forward_refs()
2307+
PropertyTypesMap.update_forward_refs()
2308+
ItemsTypeMap.update_forward_refs()
22592309
DynamicSchemaLoader.update_forward_refs()
22602310
SimpleRetriever.update_forward_refs()
22612311
AsyncRetriever.update_forward_refs()

0 commit comments

Comments
 (0)