Skip to content

Commit 85262e2

Browse files
committed
Merge branch 'lazebnyi/add-array-items-handling-to-dynamic-schemas' of github.com:airbytehq/airbyte-python-cdk into lazebnyi/add-array-items-handling-to-dynamic-schemas
2 parents 0ee84d5 + 736bf28 commit 85262e2

File tree

6 files changed

+751
-23
lines changed

6 files changed

+751
-23
lines changed

.github/workflows/connector-tests.yml

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ concurrency:
2525
jobs:
2626
cdk_changes:
2727
name: Get Changes
28-
runs-on: ubuntu-24.04
28+
runs-on: ubuntu-22.04
2929
permissions:
3030
statuses: write
3131
pull-requests: read
@@ -62,7 +62,7 @@ jobs:
6262
# Forked PRs are handled by the community_ci.yml workflow
6363
# If the condition is not met the job will be skipped (it will not fail)
6464
# runs-on: connector-test-large
65-
runs-on: ubuntu-24.04
65+
runs-on: ubuntu-22.04
6666
timeout-minutes: 360 # 6 hours
6767
strategy:
6868
fail-fast: false
@@ -96,6 +96,8 @@ jobs:
9696
name: "Check: '${{matrix.connector}}' (skip=${{needs.cdk_changes.outputs['src'] == 'false' || needs.cdk_changes.outputs[matrix.cdk_extra] == 'false'}})"
9797
permissions:
9898
checks: write
99+
contents: write # Required for creating commit statuses
100+
pull-requests: read
99101
steps:
100102
- name: Abort if extra not changed (${{matrix.cdk_extra}})
101103
id: no_changes
@@ -127,6 +129,22 @@ jobs:
127129
uses: actions/setup-python@v5
128130
with:
129131
python-version: "3.10"
132+
# Create initial pending status for test report
133+
- name: Create Pending Test Report Status
134+
if: steps.no_changes.outputs.status != 'cancelled'
135+
env:
136+
GH_TOKEN: ${{ secrets.GH_PAT_MAINTENANCE_OCTAVIA }}
137+
run: |
138+
HEAD_SHA="${{ github.event.pull_request.head.sha || github.sha }}"
139+
gh api \
140+
--method POST \
141+
-H "Accept: application/vnd.github+json" \
142+
-H "X-GitHub-Api-Version: 2022-11-28" \
143+
repos/${{ github.repository }}/statuses/$HEAD_SHA \
144+
-f state="pending" \
145+
-f description="Running connector tests..." \
146+
-f context="${{ matrix.connector }} Test Report"
147+
130148
- name: Test Connector
131149
if: steps.no_changes.outputs.status != 'cancelled'
132150
timeout-minutes: 90
@@ -173,6 +191,39 @@ jobs:
173191
echo "success=${success}" >> $GITHUB_OUTPUT
174192
echo "html_report_url=${html_report_url}" >> $GITHUB_OUTPUT
175193
194+
# Update the test report status with results
195+
- name: Update Test Report Status
196+
if: always() && steps.no_changes.outputs.status != 'cancelled' && steps.evaluate_output.outcome == 'success'
197+
env:
198+
GH_TOKEN: ${{ secrets.GH_PAT_MAINTENANCE_OCTAVIA }}
199+
run: |
200+
HEAD_SHA="${{ github.event.pull_request.head.sha || github.sha }}"
201+
gh api \
202+
--method POST \
203+
-H "Accept: application/vnd.github+json" \
204+
-H "X-GitHub-Api-Version: 2022-11-28" \
205+
repos/${{ github.repository }}/statuses/$HEAD_SHA \
206+
-f state="${{ steps.evaluate_output.outputs.success == 'true' && 'success' || 'failure' }}" \
207+
-f target_url="${{ steps.evaluate_output.outputs.html_report_url }}" \
208+
-f description="Click Details to view the test report" \
209+
-f context="${{ matrix.connector }} Test Report"
210+
211+
# Create failure status if report generation failed
212+
- name: Create Report Generation Failed Status
213+
if: always() && steps.no_changes.outputs.status != 'cancelled' && steps.evaluate_output.outcome != 'success'
214+
env:
215+
GH_TOKEN: ${{ secrets.GH_PAT_MAINTENANCE_OCTAVIA }}
216+
run: |
217+
HEAD_SHA="${{ github.event.pull_request.head.sha || github.sha }}"
218+
gh api \
219+
--method POST \
220+
-H "Accept: application/vnd.github+json" \
221+
-H "X-GitHub-Api-Version: 2022-11-28" \
222+
repos/${{ github.repository }}/statuses/$HEAD_SHA \
223+
-f state="failure" \
224+
-f description="Failed to run connector tests." \
225+
-f context="${{ matrix.connector }} Test Report"
226+
176227
# Upload the job output to the artifacts
177228
- name: Upload Job Output
178229
id: upload_job_output

airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py

Lines changed: 35 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,8 @@ def get_request_params(
222222
next_page_token: Optional[Mapping[str, Any]] = None,
223223
) -> Mapping[str, Any]:
224224
if stream_slice:
225+
if self._to_partition_key(stream_slice.partition) not in self._cursor_per_partition:
226+
self._create_cursor_for_partition(self._to_partition_key(stream_slice.partition))
225227
return self._partition_router.get_request_params( # type: ignore # this always returns a mapping
226228
stream_state=stream_state,
227229
stream_slice=StreamSlice(partition=stream_slice.partition, cursor_slice={}),
@@ -244,6 +246,8 @@ def get_request_headers(
244246
next_page_token: Optional[Mapping[str, Any]] = None,
245247
) -> Mapping[str, Any]:
246248
if stream_slice:
249+
if self._to_partition_key(stream_slice.partition) not in self._cursor_per_partition:
250+
self._create_cursor_for_partition(self._to_partition_key(stream_slice.partition))
247251
return self._partition_router.get_request_headers( # type: ignore # this always returns a mapping
248252
stream_state=stream_state,
249253
stream_slice=StreamSlice(partition=stream_slice.partition, cursor_slice={}),
@@ -266,6 +270,8 @@ def get_request_body_data(
266270
next_page_token: Optional[Mapping[str, Any]] = None,
267271
) -> Union[Mapping[str, Any], str]:
268272
if stream_slice:
273+
if self._to_partition_key(stream_slice.partition) not in self._cursor_per_partition:
274+
self._create_cursor_for_partition(self._to_partition_key(stream_slice.partition))
269275
return self._partition_router.get_request_body_data( # type: ignore # this always returns a mapping
270276
stream_state=stream_state,
271277
stream_slice=StreamSlice(partition=stream_slice.partition, cursor_slice={}),
@@ -288,6 +294,8 @@ def get_request_body_json(
288294
next_page_token: Optional[Mapping[str, Any]] = None,
289295
) -> Mapping[str, Any]:
290296
if stream_slice:
297+
if self._to_partition_key(stream_slice.partition) not in self._cursor_per_partition:
298+
self._create_cursor_for_partition(self._to_partition_key(stream_slice.partition))
291299
return self._partition_router.get_request_body_json( # type: ignore # this always returns a mapping
292300
stream_state=stream_state,
293301
stream_slice=StreamSlice(partition=stream_slice.partition, cursor_slice={}),
@@ -303,21 +311,6 @@ def get_request_body_json(
303311
raise ValueError("A partition needs to be provided in order to get request body json")
304312

305313
def should_be_synced(self, record: Record) -> bool:
306-
if (
307-
record.associated_slice
308-
and self._to_partition_key(record.associated_slice.partition)
309-
not in self._cursor_per_partition
310-
):
311-
partition_state = (
312-
self._state_to_migrate_from
313-
if self._state_to_migrate_from
314-
else self._NO_CURSOR_STATE
315-
)
316-
cursor = self._create_cursor(partition_state)
317-
318-
self._cursor_per_partition[
319-
self._to_partition_key(record.associated_slice.partition)
320-
] = cursor
321314
return self._get_cursor(record).should_be_synced(
322315
self._convert_record_to_cursor_record(record)
323316
)
@@ -356,8 +349,32 @@ def _get_cursor(self, record: Record) -> DeclarativeCursor:
356349
)
357350
partition_key = self._to_partition_key(record.associated_slice.partition)
358351
if partition_key not in self._cursor_per_partition:
359-
raise ValueError(
360-
"Invalid state as stream slices that are emitted should refer to an existing cursor"
361-
)
352+
self._create_cursor_for_partition(partition_key)
362353
cursor = self._cursor_per_partition[partition_key]
363354
return cursor
355+
356+
def _create_cursor_for_partition(self, partition_key: str) -> None:
357+
"""
358+
Dynamically creates and initializes a cursor for the specified partition.
359+
360+
This method is required for `ConcurrentPerPartitionCursor`. For concurrent cursors,
361+
stream_slices is executed only for the concurrent cursor, so cursors per partition
362+
are not created for the declarative cursor. This method ensures that a cursor is available
363+
to create requests for the specified partition. The cursor is initialized
364+
with the per-partition state if present in the initial state, or with the global state
365+
adjusted by the lookback window, or with the state to migrate from.
366+
367+
Note:
368+
This is a temporary workaround and should be removed once the declarative cursor
369+
is decoupled from the concurrent cursor implementation.
370+
371+
Args:
372+
partition_key (str): The unique identifier for the partition for which the cursor
373+
needs to be created.
374+
"""
375+
partition_state = (
376+
self._state_to_migrate_from if self._state_to_migrate_from else self._NO_CURSOR_STATE
377+
)
378+
cursor = self._create_cursor(partition_state)
379+
380+
self._cursor_per_partition[partition_key] = cursor

airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2407,7 +2407,7 @@ def create_simple_retriever(
24072407
if (
24082408
not isinstance(stream_slicer, DatetimeBasedCursor)
24092409
or type(stream_slicer) is not DatetimeBasedCursor
2410-
) and not isinstance(stream_slicer, PerPartitionWithGlobalCursor):
2410+
):
24112411
# Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods).
24122412
# Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement
24132413
# their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's

airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -296,8 +296,12 @@ def set_initial_state(self, stream_state: StreamState) -> None:
296296

297297
if not parent_state and incremental_dependency:
298298
# Attempt to retrieve child state
299-
substream_state = list(stream_state.values())
300-
substream_state = substream_state[0] if substream_state else {} # type: ignore [assignment] # Incorrect type for assignment
299+
substream_state_values = list(stream_state.values())
300+
substream_state = substream_state_values[0] if substream_state_values else {}
301+
# Filter out per partition state. Because we pass the state to the parent stream in the format {cursor_field: substream_state}
302+
if isinstance(substream_state, (list, dict)):
303+
substream_state = {}
304+
301305
parent_state = {}
302306

303307
# Copy child state to parent streams with incremental dependencies

0 commit comments

Comments
 (0)