Skip to content

Commit 2b4684a

Browse files
authored
Merge branch 'main' into devin/1754519222-add-json-schema-generation
2 parents 8b9171c + 784bdb3 commit 2b4684a

File tree

105 files changed

+7678
-1091
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

105 files changed

+7678
-1091
lines changed
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
name: 'Check Docker Tag Exists'
2+
description: 'Check if a Docker tag exists on DockerHub to prevent overwrites'
3+
inputs:
4+
image_name:
5+
description: 'Docker image name (e.g. airbyte/source-declarative-manifest)'
6+
required: true
7+
tag:
8+
description: 'Docker tag to check'
9+
required: true
10+
runs:
11+
using: "composite"
12+
steps:
13+
- name: "Check for existing tag (${{ inputs.image_name }}:${{ inputs.tag }})"
14+
shell: bash
15+
run: |
16+
image="${{ inputs.image_name }}"
17+
tag_input="${{ inputs.tag }}"
18+
if [ -z "$image" ] || [ -z "$tag_input" ]; then
19+
echo "Error: image_name and tag are required."
20+
exit 1
21+
fi
22+
tag="${image}:${tag_input}"
23+
echo "Checking if tag '$tag' exists on DockerHub..."
24+
if DOCKER_CLI_EXPERIMENTAL=enabled docker manifest inspect "$tag" > /dev/null 2>&1; then
25+
echo "The tag '$tag' already exists on DockerHub. Skipping publish to prevent overwrite."
26+
exit 1
27+
fi
28+
echo "No existing tag '$tag' found. Proceeding with publish."

.github/workflows/docker-build-check.yml

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
name: Docker Build Check
2+
permissions:
3+
contents: read
24

35
on:
46
pull_request:
57
branches:
68
- main
79

810
jobs:
9-
docker-build-check:
10-
name: SDM Docker Image Build # Renamed job to be more descriptive
11+
sdm-docker-build-check:
12+
name: SDM Docker Image Build
1113
runs-on: ubuntu-24.04
1214
steps:
1315
- name: Checkout code
@@ -42,3 +44,29 @@ jobs:
4244
push: false
4345
tags: airbyte/source-declarative-manifest:pr-${{ github.event.pull_request.number }}
4446
outputs: type=image,name=target,annotation-index.org.opencontainers.image.description=SDM Docker image for PR ${{ github.event.pull_request.number }}
47+
48+
manifest-server-docker-build-check:
49+
name: Manifest Server Docker Image Build
50+
runs-on: ubuntu-24.04
51+
steps:
52+
- name: Checkout code
53+
uses: actions/checkout@v4
54+
with:
55+
fetch-depth: 0
56+
57+
- name: Set up QEMU for multi-platform builds
58+
uses: docker/setup-qemu-action@v3
59+
60+
- name: Set up Docker Buildx
61+
uses: docker/setup-buildx-action@v3
62+
63+
- name: Build Manifest Server Docker image for multiple platforms
64+
id: manifest-server-build
65+
uses: docker/build-push-action@v5
66+
with:
67+
context: .
68+
file: airbyte_cdk/manifest_server/Dockerfile
69+
platforms: linux/amd64,linux/arm64
70+
push: false
71+
tags: airbyte/manifest-server:pr-${{ github.event.pull_request.number }}
72+
outputs: type=image,name=target,annotation-index.org.opencontainers.image.description=Manifest Server Docker image for PR ${{ github.event.pull_request.number }}
Lines changed: 95 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
# we have to also update the Trusted Publisher settings on PyPI.
77

88
name: CDK Publish
9+
permissions:
10+
contents: read
911

1012
on:
1113
push:
@@ -31,6 +33,11 @@ on:
3133
type: boolean
3234
required: true
3335
default: true
36+
publish_manifest_server:
37+
description: "Publish Manifest Server to DockerHub. If true, the workflow will publish the Manifest Server to DockerHub."
38+
type: boolean
39+
required: true
40+
default: true
3441
update_connector_builder:
3542
description: "Update Connector Builder. If true, the workflow will create a PR to bump the CDK version used by Connector Builder."
3643
type: boolean
@@ -204,18 +211,10 @@ jobs:
204211

205212
- name: "Check for existing tag (version: ${{ env.VERSION || 'none' }} )"
206213
if: env.VERSION != ''
207-
run: |
208-
tag="airbyte/source-declarative-manifest:${{ env.VERSION }}"
209-
if [ -z "$tag" ]; then
210-
echo "Error: VERSION is not set. Ensure the tag follows the format 'refs/tags/vX.Y.Z'."
211-
exit 1
212-
fi
213-
echo "Checking if tag '$tag' exists on DockerHub..."
214-
if DOCKER_CLI_EXPERIMENTAL=enabled docker manifest inspect "$tag" > /dev/null 2>&1; then
215-
echo "The tag '$tag' already exists on DockerHub. Skipping publish to prevent overwrite."
216-
exit 1
217-
fi
218-
echo "No existing tag '$tag' found. Proceeding with publish."
214+
uses: ./.github/actions/check-docker-tag
215+
with:
216+
image_name: airbyte/source-declarative-manifest
217+
tag: ${{ env.VERSION }}
219218

220219
- name: "Build and push (sha tag: '${{ github.sha }}')"
221220
# Only run if the version is not set
@@ -250,6 +249,90 @@ jobs:
250249
tags: |
251250
airbyte/source-declarative-manifest:latest
252251
252+
publish_manifest_server:
253+
name: Publish Manifest Server to DockerHub
254+
if: >
255+
(github.event_name == 'push' &&
256+
startsWith(github.ref, 'refs/tags/v')) ||
257+
(github.event_name == 'workflow_dispatch' &&
258+
github.event.inputs.publish_manifest_server == 'true'
259+
)
260+
runs-on: ubuntu-24.04
261+
needs: [build]
262+
environment:
263+
name: DockerHub
264+
url: https://hub.docker.com/r/airbyte/manifest-server/tags
265+
env:
266+
VERSION: ${{ needs.build.outputs.VERSION }}
267+
IS_PRERELEASE: ${{ needs.build.outputs.IS_PRERELEASE }}
268+
269+
steps:
270+
- uses: actions/checkout@v4
271+
with:
272+
fetch-depth: 0
273+
274+
# We need to download the build artifact again because the previous job was on a different runner
275+
- name: Download Build Artifact
276+
uses: actions/download-artifact@v4
277+
with:
278+
name: Packages-${{ github.run_id }}
279+
path: dist
280+
281+
- name: Set up QEMU for multi-platform builds
282+
uses: docker/setup-qemu-action@v3
283+
284+
- name: Set up Docker Buildx
285+
uses: docker/setup-buildx-action@v3
286+
287+
- name: Login to Docker Hub
288+
uses: docker/login-action@v3
289+
with:
290+
username: ${{ secrets.DOCKER_HUB_USERNAME }}
291+
password: ${{ secrets.DOCKER_HUB_PASSWORD }}
292+
293+
- name: "Check for existing tag (version: ${{ env.VERSION || 'none' }} )"
294+
if: env.VERSION != ''
295+
uses: ./.github/actions/check-docker-tag
296+
with:
297+
image_name: airbyte/manifest-server
298+
tag: ${{ env.VERSION }}
299+
300+
- name: "Build and push (sha tag: '${{ github.sha }}')"
301+
# Only run if the version is not set
302+
if: env.VERSION == ''
303+
uses: docker/build-push-action@v5
304+
with:
305+
context: .
306+
file: airbyte_cdk/manifest_server/Dockerfile
307+
platforms: linux/amd64,linux/arm64
308+
push: true
309+
tags: |
310+
airbyte/manifest-server:${{ github.sha }}
311+
312+
- name: "Build and push (version tag: ${{ env.VERSION || 'none'}})"
313+
# Only run if the version is set
314+
if: env.VERSION != ''
315+
uses: docker/build-push-action@v5
316+
with:
317+
context: .
318+
file: airbyte_cdk/manifest_server/Dockerfile
319+
platforms: linux/amd64,linux/arm64
320+
push: true
321+
tags: |
322+
airbyte/manifest-server:${{ env.VERSION }}
323+
324+
- name: Build and push ('latest' tag)
325+
# Only run if version is set and IS_PRERELEASE is false
326+
if: env.VERSION != '' && env.IS_PRERELEASE == 'false'
327+
uses: docker/build-push-action@v5
328+
with:
329+
context: .
330+
file: airbyte_cdk/manifest_server/Dockerfile
331+
platforms: linux/amd64,linux/arm64
332+
push: true
333+
tags: |
334+
airbyte/manifest-server:latest
335+
253336
update-connector-builder:
254337
# Create a PR against the Builder, to update the CDK version that it uses.
255338
# In the future, Builder may use the SDM docker image instead of the Python CDK package.

airbyte_cdk/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,6 @@
107107
from .sources.declarative.extractors.record_filter import RecordFilter
108108
from .sources.declarative.incremental import DatetimeBasedCursor
109109
from .sources.declarative.interpolation import InterpolatedBoolean, InterpolatedString
110-
from .sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
111110
from .sources.declarative.migrations.legacy_to_per_partition_state_migration import (
112111
LegacyToPerPartitionStateMigration,
113112
)
@@ -253,7 +252,6 @@
253252
"JsonDecoder",
254253
"JsonFileSchemaLoader",
255254
"LegacyToPerPartitionStateMigration",
256-
"ManifestDeclarativeSource",
257255
"MinMaxDatetime",
258256
"NoAuth",
259257
"OffsetIncrement",

airbyte_cdk/connector_builder/connector_builder_handler.py

Lines changed: 38 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
#
44

55

6-
from dataclasses import asdict, dataclass, field
7-
from typing import Any, ClassVar, Dict, List, Mapping
6+
from dataclasses import asdict
7+
from typing import Any, Dict, List, Mapping, Optional
88

99
from airbyte_cdk.connector_builder.test_reader import TestReader
1010
from airbyte_cdk.models import (
@@ -15,45 +15,30 @@
1515
Type,
1616
)
1717
from airbyte_cdk.models import Type as MessageType
18-
from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
19-
from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
20-
from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
21-
ModelToComponentFactory,
18+
from airbyte_cdk.sources.declarative.concurrent_declarative_source import (
19+
ConcurrentDeclarativeSource,
20+
TestLimits,
2221
)
2322
from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
2423
from airbyte_cdk.utils.datetime_helpers import ab_datetime_now
2524
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
2625

27-
DEFAULT_MAXIMUM_NUMBER_OF_PAGES_PER_SLICE = 5
28-
DEFAULT_MAXIMUM_NUMBER_OF_SLICES = 5
29-
DEFAULT_MAXIMUM_RECORDS = 100
30-
DEFAULT_MAXIMUM_STREAMS = 100
31-
3226
MAX_PAGES_PER_SLICE_KEY = "max_pages_per_slice"
3327
MAX_SLICES_KEY = "max_slices"
3428
MAX_RECORDS_KEY = "max_records"
3529
MAX_STREAMS_KEY = "max_streams"
3630

3731

38-
@dataclass
39-
class TestLimits:
40-
__test__: ClassVar[bool] = False # Tell Pytest this is not a Pytest class, despite its name
41-
42-
max_records: int = field(default=DEFAULT_MAXIMUM_RECORDS)
43-
max_pages_per_slice: int = field(default=DEFAULT_MAXIMUM_NUMBER_OF_PAGES_PER_SLICE)
44-
max_slices: int = field(default=DEFAULT_MAXIMUM_NUMBER_OF_SLICES)
45-
max_streams: int = field(default=DEFAULT_MAXIMUM_STREAMS)
46-
47-
4832
def get_limits(config: Mapping[str, Any]) -> TestLimits:
4933
command_config = config.get("__test_read_config", {})
50-
max_pages_per_slice = (
51-
command_config.get(MAX_PAGES_PER_SLICE_KEY) or DEFAULT_MAXIMUM_NUMBER_OF_PAGES_PER_SLICE
34+
return TestLimits(
35+
max_records=command_config.get(MAX_RECORDS_KEY, TestLimits.DEFAULT_MAX_RECORDS),
36+
max_pages_per_slice=command_config.get(
37+
MAX_PAGES_PER_SLICE_KEY, TestLimits.DEFAULT_MAX_PAGES_PER_SLICE
38+
),
39+
max_slices=command_config.get(MAX_SLICES_KEY, TestLimits.DEFAULT_MAX_SLICES),
40+
max_streams=command_config.get(MAX_STREAMS_KEY, TestLimits.DEFAULT_MAX_STREAMS),
5241
)
53-
max_slices = command_config.get(MAX_SLICES_KEY) or DEFAULT_MAXIMUM_NUMBER_OF_SLICES
54-
max_records = command_config.get(MAX_RECORDS_KEY) or DEFAULT_MAXIMUM_RECORDS
55-
max_streams = command_config.get(MAX_STREAMS_KEY) or DEFAULT_MAXIMUM_STREAMS
56-
return TestLimits(max_records, max_pages_per_slice, max_slices, max_streams)
5742

5843

5944
def should_migrate_manifest(config: Mapping[str, Any]) -> bool:
@@ -75,26 +60,35 @@ def should_normalize_manifest(config: Mapping[str, Any]) -> bool:
7560
return config.get("__should_normalize", False)
7661

7762

78-
def create_source(config: Mapping[str, Any], limits: TestLimits) -> ManifestDeclarativeSource:
63+
def create_source(
64+
config: Mapping[str, Any],
65+
limits: TestLimits,
66+
catalog: Optional[ConfiguredAirbyteCatalog],
67+
state: Optional[List[AirbyteStateMessage]],
68+
) -> ConcurrentDeclarativeSource[Optional[List[AirbyteStateMessage]]]:
7969
manifest = config["__injected_declarative_manifest"]
80-
return ManifestDeclarativeSource(
70+
71+
# We enforce a concurrency level of 1 so that the stream is processed on a single thread
72+
# to retain ordering for the grouping of the builder message responses.
73+
if "concurrency_level" in manifest:
74+
manifest["concurrency_level"]["default_concurrency"] = 1
75+
else:
76+
manifest["concurrency_level"] = {"type": "ConcurrencyLevel", "default_concurrency": 1}
77+
78+
return ConcurrentDeclarativeSource(
79+
catalog=catalog,
8180
config=config,
82-
emit_connector_builder_messages=True,
81+
state=state,
8382
source_config=manifest,
83+
emit_connector_builder_messages=True,
8484
migrate_manifest=should_migrate_manifest(config),
8585
normalize_manifest=should_normalize_manifest(config),
86-
component_factory=ModelToComponentFactory(
87-
emit_connector_builder_messages=True,
88-
limit_pages_fetched_per_slice=limits.max_pages_per_slice,
89-
limit_slices_fetched=limits.max_slices,
90-
disable_retries=True,
91-
disable_cache=True,
92-
),
86+
limits=limits,
9387
)
9488

9589

9690
def read_stream(
97-
source: DeclarativeSource,
91+
source: ConcurrentDeclarativeSource[Optional[List[AirbyteStateMessage]]],
9892
config: Mapping[str, Any],
9993
configured_catalog: ConfiguredAirbyteCatalog,
10094
state: List[AirbyteStateMessage],
@@ -132,7 +126,9 @@ def read_stream(
132126
return error.as_airbyte_message()
133127

134128

135-
def resolve_manifest(source: ManifestDeclarativeSource) -> AirbyteMessage:
129+
def resolve_manifest(
130+
source: ConcurrentDeclarativeSource[Optional[List[AirbyteStateMessage]]],
131+
) -> AirbyteMessage:
136132
try:
137133
return AirbyteMessage(
138134
type=Type.RECORD,
@@ -149,7 +145,9 @@ def resolve_manifest(source: ManifestDeclarativeSource) -> AirbyteMessage:
149145
return error.as_airbyte_message()
150146

151147

152-
def full_resolve_manifest(source: ManifestDeclarativeSource, limits: TestLimits) -> AirbyteMessage:
148+
def full_resolve_manifest(
149+
source: ConcurrentDeclarativeSource[Optional[List[AirbyteStateMessage]]], limits: TestLimits
150+
) -> AirbyteMessage:
153151
try:
154152
manifest = {**source.resolved_manifest}
155153
streams = manifest.get("streams", [])

0 commit comments

Comments
 (0)