Skip to content

Commit 1294b43

Browse files
ChristoGrabaaronsteersoctavia-squidington-iiialafanecherecoderabbitai[bot]
authored
Feat: Incorporate SDM in CDK and add publish workflow (#58)
Co-authored-by: Aaron Steers <[email protected]> Co-authored-by: octavia-squidington-iii <[email protected]> Co-authored-by: Augustin <[email protected]> Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
1 parent 72117aa commit 1294b43

File tree

9 files changed

+377
-3
lines changed

9 files changed

+377
-3
lines changed

.github/workflows/cdk-publish.yml

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
name: Publish CDK and Source Declarative Manifest
2+
on:
3+
push:
4+
paths:
5+
- 'airbyte_cdk/pyproject.toml' # To only publish on CDK version change
6+
- 'Dockerfile'
7+
workflow_dispatch:
8+
9+
jobs:
10+
test:
11+
name: Test Source Declarative Manifest Docker Build
12+
runs-on: ubuntu-latest
13+
steps:
14+
- uses: actions/checkout@v4
15+
with:
16+
fetch-depth: 0
17+
18+
- name: Set up QEMU for multi-platform builds
19+
uses: docker/setup-qemu-action@v3
20+
21+
- name: Set up Docker Buildx
22+
uses: docker/setup-buildx-action@v3
23+
24+
- name: Build test image
25+
uses: docker/build-push-action@v5
26+
with:
27+
context: .
28+
platforms: linux/amd64 # Just build for the runner's architecture during test
29+
load: true
30+
tags: airbyte/source-declarative-manifest:build-test
31+
32+
- name: Test image
33+
run: |
34+
docker run airbyte/source-declarative-manifest:build-test spec
35+
36+
- name: Scan for vulnerabilities
37+
uses: aquasecurity/trivy-action@master
38+
continue-on-error: true # Prevent security scan from failing the build
39+
with:
40+
image-ref: airbyte/source-declarative-manifest:build-test
41+
format: 'table,sarif'
42+
output: 'trivy-results.sarif'
43+
exit-code: 1
44+
severity: 'CRITICAL,HIGH'
45+
timeout: '5m'
46+
47+
publish:
48+
name: Publish SDM Docker Image
49+
runs-on: ubuntu-latest
50+
needs: test
51+
if: ${{ success() && (github.ref == 'refs/heads/main' || github.event_name == 'workflow_dispatch') }}
52+
permissions:
53+
id-token: write # Required for trusted publishing
54+
contents: write # Required for artifact uploads
55+
steps:
56+
- uses: actions/checkout@v4
57+
with:
58+
fetch-depth: 0
59+
60+
- name: Set up QEMU for multi-platform builds
61+
uses: docker/setup-qemu-action@v3
62+
63+
- name: Set up Docker Buildx
64+
uses: docker/setup-buildx-action@v3
65+
66+
- name: Login to Docker Hub
67+
uses: docker/login-action@v3
68+
with:
69+
username: ${{ secrets.DOCKER_HUB_USERNAME }}
70+
password: ${{ secrets.DOCKER_HUB_PASSWORD }}
71+
72+
- name: Get CDK version
73+
run: |
74+
cdk_version="$(poetry version --short | tr -d '[:space:]')"
75+
echo "CDK_VERSION=$cdk_version" >> $GITHUB_ENV
76+
77+
- name: Check if tag already exists
78+
run: |
79+
tag="airbyte/source-declarative-manifest:${{ env.CDK_VERSION}}-${{ github.run_number }}"
80+
if DOCKER_CLI_EXPERIMENTAL=enabled docker manifest inspect "$tag" > /dev/null 2>&1; then
81+
echo "The tag $tag already exists on Dockerhub. Skipping publish to prevent overwrite."
82+
exit 1
83+
fi
84+
- name: Build and push
85+
uses: docker/build-push-action@v5
86+
with:
87+
context: .
88+
platforms: linux/amd64,linux/arm64
89+
push: true
90+
tags: |
91+
airbyte/source-declarative-manifest:latest
92+
airbyte/source-declarative-manifest:${{ env.CDK_VERSION }}
93+
airbyte/source-declarative-manifest:${{ env.CDK_VERSION }}-${{ github.run_number }}

.github/workflows/connector-tests.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,12 @@ jobs:
8383
cdk_extra: vector-db-based
8484
- connector: destination-motherduck
8585
cdk_extra: sql
86+
# TODO: These are manifest connectors and won't work as expected until we
87+
# add `--use-local-cdk` support for manifest connectors.
88+
- connector: source-the-guardian-api
89+
cdk_extra: n/a
90+
- connector: source-pokeapi
91+
cdk_extra: n/a
8692

8793
name: "Check: '${{matrix.connector}}' (skip=${{needs.cdk_changes.outputs[matrix.cdk_extra] == 'false'}})"
8894
steps:

Dockerfile

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
FROM docker.io/airbyte/python-connector-base:2.0.0@sha256:c44839ba84406116e8ba68722a0f30e8f6e7056c726f447681bb9e9ece8bd916
2+
3+
WORKDIR /airbyte/integration_code
4+
5+
# Copy project files needed for build
6+
COPY pyproject.toml poetry.lock README.md ./
7+
8+
# Install dependencies - ignore keyring warnings
9+
RUN poetry config virtualenvs.create false \
10+
&& poetry install --only main --no-interaction --no-ansi || true
11+
12+
# Copy source code
13+
COPY airbyte_cdk ./airbyte_cdk
14+
15+
# Build and install the package
16+
RUN poetry build && pip install dist/*.whl
17+
18+
ENTRYPOINT ["poetry", "run", "source-declarative-manifest"]

airbyte_cdk/cli/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
from airbyte_cdk.cli.source_declarative_manifest._run import run
2+
3+
4+
__all__ = [
5+
"run",
6+
]
Lines changed: 223 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,223 @@
1+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2+
"""Defines the `source-declarative-manifest` connector, which installs alongside CDK.
3+
4+
This file was originally imported from the dedicated connector directory, under the
5+
`airbyte` monorepo.
6+
7+
Usage:
8+
9+
```
10+
pipx install airbyte-cdk
11+
source-declarative-manifest --help
12+
source-declarative-manifest spec
13+
...
14+
```
15+
"""
16+
17+
from __future__ import annotations
18+
19+
import json
20+
import pkgutil
21+
import sys
22+
import traceback
23+
from collections.abc import Mapping
24+
from datetime import datetime
25+
from pathlib import Path
26+
from typing import Any, cast
27+
28+
from airbyte_cdk.entrypoint import AirbyteEntrypoint, launch
29+
from airbyte_cdk.models import (
30+
AirbyteErrorTraceMessage,
31+
AirbyteMessage,
32+
AirbyteMessageSerializer,
33+
AirbyteStateMessage,
34+
AirbyteTraceMessage,
35+
ConfiguredAirbyteCatalog,
36+
ConnectorSpecificationSerializer,
37+
TraceType,
38+
Type,
39+
)
40+
from airbyte_cdk.sources.declarative.concurrent_declarative_source import (
41+
ConcurrentDeclarativeSource,
42+
)
43+
from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource
44+
from airbyte_cdk.sources.source import TState
45+
from orjson import orjson
46+
47+
48+
class SourceLocalYaml(YamlDeclarativeSource):
49+
"""
50+
Declarative source defined by a yaml file in the local filesystem
51+
"""
52+
53+
def __init__(
54+
self,
55+
catalog: ConfiguredAirbyteCatalog | None,
56+
config: Mapping[str, Any] | None,
57+
state: TState,
58+
**kwargs: Any,
59+
) -> None:
60+
"""
61+
HACK!
62+
Problem: YamlDeclarativeSource relies on the calling module name/path to find the yaml file.
63+
Implication: If you call YamlDeclarativeSource directly it will look for the yaml file in the wrong place. (e.g. the airbyte-cdk package)
64+
Solution: Subclass YamlDeclarativeSource from the same location as the manifest to load.
65+
66+
When can we remove this?
67+
When the airbyte-cdk is updated to not rely on the calling module name/path to find the yaml file.
68+
When all manifest connectors are updated to use the new airbyte-cdk.
69+
When all manifest connectors are updated to use the source-declarative-manifest as the base image.
70+
"""
71+
super().__init__(
72+
catalog=catalog,
73+
config=config,
74+
state=state,
75+
path_to_yaml="manifest.yaml",
76+
)
77+
78+
79+
def _is_local_manifest_command(args: list[str]) -> bool:
80+
# Check for a local manifest.yaml file
81+
return Path("/airbyte/integration_code/source_declarative_manifest/manifest.yaml").exists()
82+
83+
84+
def handle_command(args: list[str]) -> None:
85+
if _is_local_manifest_command(args):
86+
handle_local_manifest_command(args)
87+
else:
88+
handle_remote_manifest_command(args)
89+
90+
91+
def _get_local_yaml_source(args: list[str]) -> SourceLocalYaml:
92+
try:
93+
config, catalog, state = _parse_inputs_into_config_catalog_state(args)
94+
return SourceLocalYaml(config=config, catalog=catalog, state=state)
95+
except Exception as error:
96+
print(
97+
orjson.dumps(
98+
AirbyteMessageSerializer.dump(
99+
AirbyteMessage(
100+
type=Type.TRACE,
101+
trace=AirbyteTraceMessage(
102+
type=TraceType.ERROR,
103+
emitted_at=int(datetime.now().timestamp() * 1000),
104+
error=AirbyteErrorTraceMessage(
105+
message=f"Error starting the sync. This could be due to an invalid configuration or catalog. Please contact Support for assistance. Error: {error}",
106+
stack_trace=traceback.format_exc(),
107+
),
108+
),
109+
)
110+
)
111+
).decode()
112+
)
113+
raise error
114+
115+
116+
def handle_local_manifest_command(args: list[str]) -> None:
117+
source = _get_local_yaml_source(args)
118+
launch(
119+
source=source,
120+
args=args,
121+
)
122+
123+
124+
def handle_remote_manifest_command(args: list[str]) -> None:
125+
"""Overrides the spec command to return the generalized spec for the declarative manifest source.
126+
127+
This is different from a typical low-code, but built and published separately source built as a ManifestDeclarativeSource,
128+
because that will have a spec method that returns the spec for that specific source. Other than spec,
129+
the generalized connector behaves the same as any other, since the manifest is provided in the config.
130+
"""
131+
if args[0] == "spec":
132+
json_spec = pkgutil.get_data(
133+
"airbyte_cdk.cli.source_declarative_manifest",
134+
"spec.json",
135+
)
136+
if json_spec is None:
137+
raise FileNotFoundError(
138+
"Could not find `spec.json` file for source-declarative-manifest"
139+
)
140+
141+
spec_obj = json.loads(json_spec)
142+
spec = ConnectorSpecificationSerializer.load(spec_obj)
143+
144+
message = AirbyteMessage(type=Type.SPEC, spec=spec)
145+
print(AirbyteEntrypoint.airbyte_message_to_string(message))
146+
else:
147+
source = create_declarative_source(args)
148+
launch(
149+
source=source,
150+
args=args,
151+
)
152+
153+
154+
def create_declarative_source(args: list[str]) -> ConcurrentDeclarativeSource:
155+
"""Creates the source with the injected config.
156+
157+
This essentially does what other low-code sources do at build time, but at runtime,
158+
with a user-provided manifest in the config. This better reflects what happens in the
159+
connector builder.
160+
"""
161+
try:
162+
config, catalog, state = _parse_inputs_into_config_catalog_state(args)
163+
if "__injected_declarative_manifest" not in config:
164+
raise ValueError(
165+
f"Invalid config: `__injected_declarative_manifest` should be provided at the root of the config but config only has keys {list(config.keys())}"
166+
)
167+
return ConcurrentDeclarativeSource(
168+
config=config,
169+
catalog=catalog,
170+
state=state,
171+
source_config=cast(dict[str, Any], config["__injected_declarative_manifest"]),
172+
)
173+
except Exception as error:
174+
print(
175+
orjson.dumps(
176+
AirbyteMessageSerializer.dump(
177+
AirbyteMessage(
178+
type=Type.TRACE,
179+
trace=AirbyteTraceMessage(
180+
type=TraceType.ERROR,
181+
emitted_at=int(datetime.now().timestamp() * 1000),
182+
error=AirbyteErrorTraceMessage(
183+
message=f"Error starting the sync. This could be due to an invalid configuration or catalog. Please contact Support for assistance. Error: {error}",
184+
stack_trace=traceback.format_exc(),
185+
),
186+
),
187+
)
188+
)
189+
).decode()
190+
)
191+
raise error
192+
193+
194+
def _parse_inputs_into_config_catalog_state(
195+
args: list[str],
196+
) -> tuple[
197+
Mapping[str, Any] | None,
198+
ConfiguredAirbyteCatalog | None,
199+
list[AirbyteStateMessage],
200+
]:
201+
parsed_args = AirbyteEntrypoint.parse_args(args)
202+
config = (
203+
ConcurrentDeclarativeSource.read_config(parsed_args.config)
204+
if hasattr(parsed_args, "config")
205+
else None
206+
)
207+
catalog = (
208+
ConcurrentDeclarativeSource.read_catalog(parsed_args.catalog)
209+
if hasattr(parsed_args, "catalog")
210+
else None
211+
)
212+
state = (
213+
ConcurrentDeclarativeSource.read_state(parsed_args.state)
214+
if hasattr(parsed_args, "state")
215+
else []
216+
)
217+
218+
return config, catalog, state
219+
220+
221+
def run() -> None:
222+
args: list[str] = sys.argv[1:]
223+
handle_command(args)
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
{
2+
"documentationUrl": "https://docs.airbyte.com/integrations/sources/low-code",
3+
"connectionSpecification": {
4+
"$schema": "http://json-schema.org/draft-07/schema#",
5+
"title": "Low-code source spec",
6+
"type": "object",
7+
"required": ["__injected_declarative_manifest"],
8+
"additionalProperties": true,
9+
"properties": {
10+
"__injected_declarative_manifest": {
11+
"title": "Low-code manifest",
12+
"type": "object",
13+
"description": "The low-code manifest that defines the components of the source."
14+
}
15+
}
16+
}
17+
}

0 commit comments

Comments
 (0)