Skip to content

Commit 11382f9

Browse files
committed
Add full_refresh_ignore_min_max_datetime flag
1 parent 4ef852e commit 11382f9

File tree

4 files changed

+94
-42
lines changed

4 files changed

+94
-42
lines changed

airbyte_cdk/sources/declarative/concurrent_declarative_source.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,17 @@ def _group_streams(
228228
incremental_sync_component_definition["start_time_option"] = None
229229
incremental_sync_component_definition["end_time_option"] = None
230230

231+
if (
232+
name_to_stream_mapping[declarative_stream.name]
233+
.get("retriever", {})
234+
.get("full_refresh_ignore_min_max_datetime", False)
235+
and incremental_sync_component_definition
236+
):
237+
incremental_sync_component_definition["start_datetime"]["max_datetime"] = None
238+
incremental_sync_component_definition["start_datetime"]["min_datetime"] = None
239+
incremental_sync_component_definition["end_datetime"]["max_datetime"] = None
240+
incremental_sync_component_definition["end_datetime"]["min_datetime"] = None
241+
231242
partition_router_component_definition = (
232243
name_to_stream_mapping[declarative_stream.name]
233244
.get("retriever", {})

airbyte_cdk/sources/declarative/declarative_component_schema.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3166,6 +3166,10 @@ definitions:
31663166
description: If set to true, a single slice will be used and its request options will be ignored when sending requests.
31673167
type: boolean
31683168
default: false
3169+
full_refresh_ignore_min_max_datetime:
3170+
description: If set to true, a min and max limitation for start and end datetime will be ignored for full refresh retriever.
3171+
type: boolean
3172+
default: false
31693173
full_refresh_retriever:
31703174
title: Retriever
31713175
description: Component used to coordinate how records are extracted across stream slices and request pages when the state is empty or not provided.

airbyte_cdk/sources/declarative/models/declarative_component_schema.py

Lines changed: 74 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -609,7 +609,9 @@ class OAuthAuthenticator(BaseModel):
609609
scopes: Optional[List[str]] = Field(
610610
None,
611611
description="List of scopes that should be granted to the access token.",
612-
examples=[["crm.list.read", "crm.objects.contacts.read", "crm.schema.contacts.read"]],
612+
examples=[
613+
["crm.list.read", "crm.objects.contacts.read", "crm.schema.contacts.read"]
614+
],
613615
title="Scopes",
614616
)
615617
token_expiry_date: Optional[str] = Field(
@@ -1078,24 +1080,28 @@ class OAuthConfigSpecification(BaseModel):
10781080
class Config:
10791081
extra = Extra.allow
10801082

1081-
oauth_user_input_from_connector_config_specification: Optional[Dict[str, Any]] = Field(
1082-
None,
1083-
description="OAuth specific blob. This is a Json Schema used to validate Json configurations used as input to OAuth.\nMust be a valid non-nested JSON that refers to properties from ConnectorSpecification.connectionSpecification\nusing special annotation 'path_in_connector_config'.\nThese are input values the user is entering through the UI to authenticate to the connector, that might also shared\nas inputs for syncing data via the connector.\nExamples:\nif no connector values is shared during oauth flow, oauth_user_input_from_connector_config_specification=[]\nif connector values such as 'app_id' inside the top level are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['app_id']\n }\n }\nif connector values such as 'info.app_id' nested inside another object are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['info', 'app_id']\n }\n }",
1084-
examples=[
1085-
{"app_id": {"type": "string", "path_in_connector_config": ["app_id"]}},
1086-
{
1087-
"app_id": {
1088-
"type": "string",
1089-
"path_in_connector_config": ["info", "app_id"],
1090-
}
1091-
},
1092-
],
1093-
title="OAuth user input",
1083+
oauth_user_input_from_connector_config_specification: Optional[Dict[str, Any]] = (
1084+
Field(
1085+
None,
1086+
description="OAuth specific blob. This is a Json Schema used to validate Json configurations used as input to OAuth.\nMust be a valid non-nested JSON that refers to properties from ConnectorSpecification.connectionSpecification\nusing special annotation 'path_in_connector_config'.\nThese are input values the user is entering through the UI to authenticate to the connector, that might also shared\nas inputs for syncing data via the connector.\nExamples:\nif no connector values is shared during oauth flow, oauth_user_input_from_connector_config_specification=[]\nif connector values such as 'app_id' inside the top level are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['app_id']\n }\n }\nif connector values such as 'info.app_id' nested inside another object are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['info', 'app_id']\n }\n }",
1087+
examples=[
1088+
{"app_id": {"type": "string", "path_in_connector_config": ["app_id"]}},
1089+
{
1090+
"app_id": {
1091+
"type": "string",
1092+
"path_in_connector_config": ["info", "app_id"],
1093+
}
1094+
},
1095+
],
1096+
title="OAuth user input",
1097+
)
10941098
)
1095-
oauth_connector_input_specification: Optional[OauthConnectorInputSpecification] = Field(
1096-
None,
1097-
description='The DeclarativeOAuth specific blob.\nPertains to the fields defined by the connector relating to the OAuth flow.\n\nInterpolation capabilities:\n- The variables placeholders are declared as `{{my_var}}`.\n- The nested resolution variables like `{{ {{my_nested_var}} }}` is allowed as well.\n\n- The allowed interpolation context is:\n + base64Encoder - encode to `base64`, {{ {{my_var_a}}:{{my_var_b}} | base64Encoder }}\n + base64Decorer - decode from `base64` encoded string, {{ {{my_string_variable_or_string_value}} | base64Decoder }}\n + urlEncoder - encode the input string to URL-like format, {{ https://test.host.com/endpoint | urlEncoder}}\n + urlDecorer - decode the input url-encoded string into text format, {{ urlDecoder:https%3A%2F%2Fairbyte.io | urlDecoder}}\n + codeChallengeS256 - get the `codeChallenge` encoded value to provide additional data-provider specific authorisation values, {{ {{state_value}} | codeChallengeS256 }}\n\nExamples:\n - The TikTok Marketing DeclarativeOAuth spec:\n {\n "oauth_connector_input_specification": {\n "type": "object",\n "additionalProperties": false,\n "properties": {\n "consent_url": "https://ads.tiktok.com/marketing_api/auth?{{client_id_key}}={{client_id_value}}&{{redirect_uri_key}}={{ {{redirect_uri_value}} | urlEncoder}}&{{state_key}}={{state_value}}",\n "access_token_url": "https://business-api.tiktok.com/open_api/v1.3/oauth2/access_token/",\n "access_token_params": {\n "{{ auth_code_key }}": "{{ auth_code_value }}",\n "{{ client_id_key }}": "{{ client_id_value }}",\n "{{ client_secret_key }}": "{{ client_secret_value }}"\n },\n "access_token_headers": {\n "Content-Type": "application/json",\n "Accept": "application/json"\n },\n "extract_output": ["data.access_token"],\n "client_id_key": "app_id",\n "client_secret_key": "secret",\n "auth_code_key": "auth_code"\n }\n }\n }',
1098-
title="DeclarativeOAuth Connector Specification",
1099+
oauth_connector_input_specification: Optional[OauthConnectorInputSpecification] = (
1100+
Field(
1101+
None,
1102+
description='The DeclarativeOAuth specific blob.\nPertains to the fields defined by the connector relating to the OAuth flow.\n\nInterpolation capabilities:\n- The variables placeholders are declared as `{{my_var}}`.\n- The nested resolution variables like `{{ {{my_nested_var}} }}` is allowed as well.\n\n- The allowed interpolation context is:\n + base64Encoder - encode to `base64`, {{ {{my_var_a}}:{{my_var_b}} | base64Encoder }}\n + base64Decorer - decode from `base64` encoded string, {{ {{my_string_variable_or_string_value}} | base64Decoder }}\n + urlEncoder - encode the input string to URL-like format, {{ https://test.host.com/endpoint | urlEncoder}}\n + urlDecorer - decode the input url-encoded string into text format, {{ urlDecoder:https%3A%2F%2Fairbyte.io | urlDecoder}}\n + codeChallengeS256 - get the `codeChallenge` encoded value to provide additional data-provider specific authorisation values, {{ {{state_value}} | codeChallengeS256 }}\n\nExamples:\n - The TikTok Marketing DeclarativeOAuth spec:\n {\n "oauth_connector_input_specification": {\n "type": "object",\n "additionalProperties": false,\n "properties": {\n "consent_url": "https://ads.tiktok.com/marketing_api/auth?{{client_id_key}}={{client_id_value}}&{{redirect_uri_key}}={{ {{redirect_uri_value}} | urlEncoder}}&{{state_key}}={{state_value}}",\n "access_token_url": "https://business-api.tiktok.com/open_api/v1.3/oauth2/access_token/",\n "access_token_params": {\n "{{ auth_code_key }}": "{{ auth_code_value }}",\n "{{ client_id_key }}": "{{ client_id_value }}",\n "{{ client_secret_key }}": "{{ client_secret_value }}"\n },\n "access_token_headers": {\n "Content-Type": "application/json",\n "Accept": "application/json"\n },\n "extract_output": ["data.access_token"],\n "client_id_key": "app_id",\n "client_secret_key": "secret",\n "auth_code_key": "auth_code"\n }\n }\n }',
1103+
title="DeclarativeOAuth Connector Specification",
1104+
)
10991105
)
11001106
complete_oauth_output_specification: Optional[Dict[str, Any]] = Field(
11011107
None,
@@ -1113,7 +1119,9 @@ class Config:
11131119
complete_oauth_server_input_specification: Optional[Dict[str, Any]] = Field(
11141120
None,
11151121
description="OAuth specific blob. This is a Json Schema used to validate Json configurations persisted as Airbyte Server configurations.\nMust be a valid non-nested JSON describing additional fields configured by the Airbyte Instance or Workspace Admins to be used by the\nserver when completing an OAuth flow (typically exchanging an auth code for refresh token).\nExamples:\n complete_oauth_server_input_specification={\n client_id: {\n type: string\n },\n client_secret: {\n type: string\n }\n }",
1116-
examples=[{"client_id": {"type": "string"}, "client_secret": {"type": "string"}}],
1122+
examples=[
1123+
{"client_id": {"type": "string"}, "client_secret": {"type": "string"}}
1124+
],
11171125
title="OAuth input specification",
11181126
)
11191127
complete_oauth_server_output_specification: Optional[Dict[str, Any]] = Field(
@@ -1766,7 +1774,9 @@ class RecordSelector(BaseModel):
17661774
description="Responsible for filtering records to be emitted by the Source.",
17671775
title="Record Filter",
17681776
)
1769-
schema_normalization: Optional[Union[SchemaNormalization, CustomSchemaNormalization]] = Field(
1777+
schema_normalization: Optional[
1778+
Union[SchemaNormalization, CustomSchemaNormalization]
1779+
] = Field(
17701780
SchemaNormalization.None_,
17711781
description="Responsible for normalization according to the schema.",
17721782
title="Schema Normalization",
@@ -1965,12 +1975,12 @@ class Config:
19651975
extra = Extra.allow
19661976

19671977
type: Literal["DeclarativeStream"]
1968-
retriever: Union[AsyncRetriever, CustomRetriever, SimpleRetriever, StateDelegatingRetriever] = (
1969-
Field(
1970-
...,
1971-
description="Component used to coordinate how records are extracted across stream slices and request pages.",
1972-
title="Retriever",
1973-
)
1978+
retriever: Union[
1979+
AsyncRetriever, CustomRetriever, SimpleRetriever, StateDelegatingRetriever
1980+
] = Field(
1981+
...,
1982+
description="Component used to coordinate how records are extracted across stream slices and request pages.",
1983+
title="Retriever",
19741984
)
19751985
incremental_sync: Optional[
19761986
Union[CustomIncrementalSync, DatetimeBasedCursor, IncrementingCountCursor]
@@ -1979,7 +1989,9 @@ class Config:
19791989
description="Component used to fetch data incrementally based on a time field in the data.",
19801990
title="Incremental Sync",
19811991
)
1982-
name: Optional[str] = Field("", description="The stream name.", example=["Users"], title="Name")
1992+
name: Optional[str] = Field(
1993+
"", description="The stream name.", example=["Users"], title="Name"
1994+
)
19831995
primary_key: Optional[PrimaryKey] = Field(
19841996
"", description="The primary key of the stream.", title="Primary Key"
19851997
)
@@ -2236,15 +2248,23 @@ class StateDelegatingRetriever(BaseModel):
22362248
False,
22372249
description="If set to true, a single slice will be used and its request options will be ignored when sending requests.",
22382250
)
2239-
full_refresh_retriever: Union[AsyncRetriever, CustomRetriever, SimpleRetriever] = Field(
2240-
...,
2241-
description="Component used to coordinate how records are extracted across stream slices and request pages when the state is empty or not provided.",
2242-
title="Retriever",
2251+
full_refresh_ignore_min_max_datetime: Optional[bool] = Field(
2252+
False,
2253+
description="If set to true, a min and max limitation for start and end datetime will be ignored for full refresh retriever.",
22432254
)
2244-
incremental_retriever: Union[AsyncRetriever, CustomRetriever, SimpleRetriever] = Field(
2245-
...,
2246-
description="Component used to coordinate how records are extracted across stream slices and request pages when the state provided.",
2247-
title="Retriever",
2255+
full_refresh_retriever: Union[AsyncRetriever, CustomRetriever, SimpleRetriever] = (
2256+
Field(
2257+
...,
2258+
description="Component used to coordinate how records are extracted across stream slices and request pages when the state is empty or not provided.",
2259+
title="Retriever",
2260+
)
2261+
)
2262+
incremental_retriever: Union[AsyncRetriever, CustomRetriever, SimpleRetriever] = (
2263+
Field(
2264+
...,
2265+
description="Component used to coordinate how records are extracted across stream slices and request pages when the state provided.",
2266+
title="Retriever",
2267+
)
22482268
)
22492269
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
22502270

@@ -2272,7 +2292,11 @@ class SimpleRetriever(BaseModel):
22722292
CustomPartitionRouter,
22732293
ListPartitionRouter,
22742294
SubstreamPartitionRouter,
2275-
List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]],
2295+
List[
2296+
Union[
2297+
CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter
2298+
]
2299+
],
22762300
]
22772301
] = Field(
22782302
[],
@@ -2316,7 +2340,9 @@ class AsyncRetriever(BaseModel):
23162340
)
23172341
download_extractor: Optional[
23182342
Union[CustomRecordExtractor, DpathExtractor, ResponseToFileExtractor]
2319-
] = Field(None, description="Responsible for fetching the records from provided urls.")
2343+
] = Field(
2344+
None, description="Responsible for fetching the records from provided urls."
2345+
)
23202346
creation_requester: Union[CustomRequester, HttpRequester] = Field(
23212347
...,
23222348
description="Requester component that describes how to prepare HTTP requests to send to the source API to create the async server-side job.",
@@ -2350,7 +2376,11 @@ class AsyncRetriever(BaseModel):
23502376
CustomPartitionRouter,
23512377
ListPartitionRouter,
23522378
SubstreamPartitionRouter,
2353-
List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]],
2379+
List[
2380+
Union[
2381+
CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter
2382+
]
2383+
],
23542384
]
23552385
] = Field(
23562386
[],
@@ -2418,10 +2448,12 @@ class DynamicDeclarativeStream(BaseModel):
24182448
stream_template: DeclarativeStream = Field(
24192449
..., description="Reference to the stream template.", title="Stream Template"
24202450
)
2421-
components_resolver: Union[HttpComponentsResolver, ConfigComponentsResolver] = Field(
2422-
...,
2423-
description="Component resolve and populates stream templates with components values.",
2424-
title="Components Resolver",
2451+
components_resolver: Union[HttpComponentsResolver, ConfigComponentsResolver] = (
2452+
Field(
2453+
...,
2454+
description="Component resolve and populates stream templates with components values.",
2455+
title="Components Resolver",
2456+
)
24252457
)
24262458

24272459

airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1820,6 +1820,11 @@ def _build_incremental_cursor(
18201820
model.incremental_sync.cursor_granularity = None
18211821
model.incremental_sync.start_time_option = None
18221822
model.incremental_sync.end_time_option = None
1823+
elif model.retriever.full_refresh_ignore_min_max_datetime:
1824+
model.incremental_sync.start_datetime.max_datetime = None
1825+
model.incremental_sync.start_datetime.min_datetime = None
1826+
model.incremental_sync.end_datetime.max_datetime = None
1827+
model.incremental_sync.end_datetime.min_datetime = None
18231828

18241829
if model.incremental_sync and stream_slicer:
18251830
if model.retriever.type == "AsyncRetriever":

0 commit comments

Comments
 (0)