Skip to content

Commit 3638129

Browse files
committed
Merge branch 'main' into pnilan/feat/extend-spec-class-for-config-migrations
2 parents ded5fcb + d458e8f commit 3638129

22 files changed

+894
-111
lines changed

.coveragerc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,13 @@ omit =
99

1010
# omit as unimplemented
1111
airbyte_cdk/base_python/cdk/streams/auth/jwt.py
12+
13+
# omit temporary files and test files
14+
/tmp/tmp*.py
15+
unit_tests/*
16+
17+
[paths]
18+
# Reconcile file paths
19+
source =
20+
./
21+
/tmp/
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# When a PR is has the auto-merge feature enabled or disabled, this workflow adds or updates
2+
# warning text at the bottom of the PR description.
3+
4+
name: "Add Auto-Merge Notification Text"
5+
on:
6+
pull_request:
7+
types: [auto_merge_enabled, auto_merge_disabled]
8+
9+
jobs:
10+
update-description:
11+
runs-on: ubuntu-latest
12+
permissions:
13+
pull-requests: write
14+
steps:
15+
- name: Add Auto-Merge Notice
16+
if: github.event.action == 'auto_merge_enabled'
17+
uses: bcgov/[email protected]
18+
with:
19+
add_markdown: |
20+
> [!IMPORTANT]
21+
> **Auto-merge enabled.**
22+
>
23+
> _This PR is set to merge automatically when all requirements are met._
24+
25+
- name: Remove Auto-Merge Notice
26+
if: github.event.action == 'auto_merge_disabled'
27+
uses: bcgov/[email protected]
28+
with:
29+
add_markdown: |
30+
> [!NOTE]
31+
> **Auto-merge may have been disabled. Please check the PR status to confirm.**

.pre-commit-config.yaml

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,14 @@
33
exclude: |
44
(?x)(
55
# Python/system files
6-
^.*/__init__\.py$|
76
^.*?/\.venv/.*$|
87
^.*?/node_modules/.*$|
9-
8+
109
# Generated/test files
1110
^.*?/\.pytest_cache/.*$|
1211
^.*?/__pycache__/.*$|
1312
^.*?/\.mypy_cache/.*$|
14-
^.*?/\.ruff_cache/.*$
13+
^.*?/\.ruff_cache/.*$|
1514
1615
# Package management
1716
^.*?/poetry\.lock$|
@@ -22,7 +21,7 @@ exclude: |
2221
^.*?/build/.*$|
2322
^.*?/dist/.*$|
2423
^.*?/\.coverage$|
25-
^.*?/coverage\.xml$|
24+
^.*?/coverage\.xml$
2625
)
2726
2827
repos:
@@ -38,18 +37,15 @@ repos:
3837
- repo: https://github.com/astral-sh/ruff-pre-commit
3938
rev: v0.11.5
4039
hooks:
41-
# Run the linter with repo-defined settings
4240
- id: ruff
4341
args: [--fix]
44-
45-
# Run the formatter with repo-defined settings
4642
- id: ruff-format
4743

4844
- repo: https://github.com/pre-commit/mirrors-prettier
4945
rev: v3.0.3
5046
hooks:
5147
- id: prettier
52-
types_or: [json, yaml]
48+
args: [--write]
5349
additional_dependencies:
5450
5551

@@ -61,3 +57,20 @@ repos:
6157
language: golang
6258
additional_dependencies: [github.com/google/[email protected]]
6359
files: \.py$
60+
61+
- id: poetry-check
62+
name: Check Poetry lockfile
63+
entry: poetry check
64+
language: system
65+
pass_filenames: false
66+
always_run: true
67+
68+
- repo: https://github.com/pre-commit/mirrors-mypy
69+
rev: v1.8.0
70+
hooks:
71+
- id: mypy
72+
name: mypy
73+
entry: mypy
74+
args: [--config-file=mypy.ini, --show-column-numbers]
75+
files: ^airbyte_cdk/
76+
pass_filenames: true

airbyte_cdk/cli/airbyte_cdk/_connector.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
"""
4040

4141
import os
42+
import sys
4243
from pathlib import Path
4344
from types import ModuleType
4445

@@ -166,10 +167,11 @@ def test(
166167
click.echo(f"Collect only: {collect_only}")
167168
click.echo(f"Pytest args: {pytest_args}")
168169
click.echo("Invoking Pytest...")
169-
pytest.main(
170+
exit_code = pytest.main(
170171
pytest_args,
171172
plugins=[],
172173
)
174+
sys.exit(exit_code)
173175

174176

175177
__all__ = [

airbyte_cdk/cli/airbyte_cdk/_secrets.py

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,12 @@
4949
resolve_connector_name_and_directory,
5050
)
5151

52-
AIRBYTE_INTERNAL_GCP_PROJECT = "dataline-integration-testing"
52+
GCP_PROJECT_ID: str = os.environ.get("GCP_PROJECT_ID", "") or "dataline-integration-testing"
53+
# We put the `or` outside the `get()` because we want the `GCP_PROJECT_ID`
54+
# env var to be ignored if it contains an empty string, such as in CI where the
55+
# workflow might set it to a value that is itself actually missing or unset.
56+
"""The GCP project ID to use for fetching integration test secrets."""
57+
5358
CONNECTOR_LABEL = "connector"
5459
GLOBAL_MASK_KEYS_URL = "https://connectors.airbyte.com/files/registries/v0/specs_secrets_mask.yaml"
5560

@@ -83,8 +88,11 @@ def secrets_cli_group() -> None:
8388
@click.option(
8489
"--gcp-project-id",
8590
type=str,
86-
default=AIRBYTE_INTERNAL_GCP_PROJECT,
87-
help=f"GCP project ID. Defaults to '{AIRBYTE_INTERNAL_GCP_PROJECT}'.",
91+
default=GCP_PROJECT_ID,
92+
help=(
93+
"GCP project ID for retrieving integration tests credentials. "
94+
"Defaults to the value of the `GCP_PROJECT_ID` environment variable, if set."
95+
),
8896
)
8997
@click.option(
9098
"--print-ci-secrets-masks",
@@ -95,7 +103,7 @@ def secrets_cli_group() -> None:
95103
)
96104
def fetch(
97105
connector: str | Path | None = None,
98-
gcp_project_id: str = AIRBYTE_INTERNAL_GCP_PROJECT,
106+
gcp_project_id: str = GCP_PROJECT_ID,
99107
print_ci_secrets_masks: bool = False,
100108
) -> None:
101109
"""Fetch secrets for a connector from Google Secret Manager.
@@ -192,41 +200,41 @@ def fetch(
192200

193201

194202
@secrets_cli_group.command("list")
195-
@click.option(
196-
"--connector-name",
203+
@click.argument(
204+
"connector",
205+
required=False,
197206
type=str,
198-
help="Name of the connector to fetch secrets for. Ignored if --connector-directory is provided.",
199-
)
200-
@click.option(
201-
"--connector-directory",
202-
type=click.Path(exists=True, file_okay=False, path_type=Path),
203-
help="Path to the connector directory.",
207+
metavar="[CONNECTOR]",
204208
)
205209
@click.option(
206210
"--gcp-project-id",
207211
type=str,
208-
default=AIRBYTE_INTERNAL_GCP_PROJECT,
209-
help=f"GCP project ID. Defaults to '{AIRBYTE_INTERNAL_GCP_PROJECT}'.",
212+
default=GCP_PROJECT_ID,
213+
help=(
214+
"GCP project ID for retrieving integration tests credentials. "
215+
"Defaults to the value of the `GCP_PROJECT_ID` environment variable, if set."
216+
),
210217
)
211218
def list_(
212-
connector_name: str | None = None,
213-
connector_directory: Path | None = None,
214-
gcp_project_id: str = AIRBYTE_INTERNAL_GCP_PROJECT,
219+
connector: str | Path | None = None,
220+
*,
221+
gcp_project_id: str = GCP_PROJECT_ID,
215222
) -> None:
216223
"""List secrets for a connector from Google Secret Manager.
217224
218225
This command fetches secrets for a connector from Google Secret Manager and prints
219226
them as a table.
220227
228+
[CONNECTOR] can be a connector name (e.g. 'source-pokeapi'), a path to a connector directory, or omitted to use the current working directory.
229+
If a string containing '/' is provided, it is treated as a path. Otherwise, it is treated as a connector name.
230+
221231
If no connector name or directory is provided, we will look within the current working
222232
directory. If the current working directory is not a connector directory (e.g. starting
223233
with 'source-') and no connector name or path is provided, the process will fail.
224234
"""
225235
click.echo("Scanning secrets...", err=True)
226236

227-
connector_name = connector_name or resolve_connector_name(
228-
connector_directory=connector_directory or Path().resolve().absolute(),
229-
)
237+
connector_name, _ = resolve_connector_name_and_directory(connector)
230238
secrets: list[Secret] = _fetch_secret_handles( # type: ignore
231239
connector_name=connector_name,
232240
gcp_project_id=gcp_project_id,
@@ -303,7 +311,7 @@ def _get_secret_url(secret_name: str, gcp_project_id: str) -> str:
303311

304312
def _fetch_secret_handles(
305313
connector_name: str,
306-
gcp_project_id: str = AIRBYTE_INTERNAL_GCP_PROJECT,
314+
gcp_project_id: str = GCP_PROJECT_ID,
307315
) -> list["Secret"]: # type: ignore
308316
"""Fetch secrets from Google Secret Manager."""
309317
if not secretmanager:

airbyte_cdk/cli/source_declarative_manifest/_run.py

Lines changed: 69 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,17 @@
1616

1717
from __future__ import annotations
1818

19+
import argparse
1920
import json
2021
import pkgutil
2122
import sys
2223
import traceback
23-
from collections.abc import Mapping
24+
from collections.abc import MutableMapping
2425
from pathlib import Path
2526
from typing import Any, cast
2627

2728
import orjson
29+
import yaml
2830

2931
from airbyte_cdk.entrypoint import AirbyteEntrypoint, launch
3032
from airbyte_cdk.models import (
@@ -54,7 +56,7 @@ class SourceLocalYaml(YamlDeclarativeSource):
5456
def __init__(
5557
self,
5658
catalog: ConfiguredAirbyteCatalog | None,
57-
config: Mapping[str, Any] | None,
59+
config: MutableMapping[str, Any] | None,
5860
state: TState,
5961
**kwargs: Any,
6062
) -> None:
@@ -91,7 +93,8 @@ def handle_command(args: list[str]) -> None:
9193

9294
def _get_local_yaml_source(args: list[str]) -> SourceLocalYaml:
9395
try:
94-
config, catalog, state = _parse_inputs_into_config_catalog_state(args)
96+
parsed_args = AirbyteEntrypoint.parse_args(args)
97+
config, catalog, state = _parse_inputs_into_config_catalog_state(parsed_args)
9598
return SourceLocalYaml(config=config, catalog=catalog, state=state)
9699
except Exception as error:
97100
print(
@@ -162,21 +165,40 @@ def create_declarative_source(
162165
connector builder.
163166
"""
164167
try:
165-
config: Mapping[str, Any] | None
168+
config: MutableMapping[str, Any] | None
166169
catalog: ConfiguredAirbyteCatalog | None
167170
state: list[AirbyteStateMessage]
168-
config, catalog, state = _parse_inputs_into_config_catalog_state(args)
169-
if config is None or "__injected_declarative_manifest" not in config:
171+
172+
parsed_args = AirbyteEntrypoint.parse_args(args)
173+
config, catalog, state = _parse_inputs_into_config_catalog_state(parsed_args)
174+
175+
if config is None:
176+
raise ValueError(
177+
"Invalid config: `__injected_declarative_manifest` should be provided at the root "
178+
"of the config or using the --manifest-path argument."
179+
)
180+
181+
# If a manifest_path is provided in the args, inject it into the config
182+
if hasattr(parsed_args, "manifest_path") and parsed_args.manifest_path:
183+
injected_manifest = _parse_manifest_from_file(parsed_args.manifest_path)
184+
if injected_manifest:
185+
config["__injected_declarative_manifest"] = injected_manifest
186+
187+
if "__injected_declarative_manifest" not in config:
170188
raise ValueError(
171189
"Invalid config: `__injected_declarative_manifest` should be provided at the root "
172-
f"of the config but config only has keys: {list(config.keys() if config else [])}"
190+
"of the config or using the --manifest-path argument. "
191+
f"Config only has keys: {list(config.keys() if config else [])}"
173192
)
174193
if not isinstance(config["__injected_declarative_manifest"], dict):
175194
raise ValueError(
176195
"Invalid config: `__injected_declarative_manifest` should be a dictionary, "
177196
f"but got type: {type(config['__injected_declarative_manifest'])}"
178197
)
179198

199+
if hasattr(parsed_args, "components_path") and parsed_args.components_path:
200+
_register_components_from_file(parsed_args.components_path)
201+
180202
return ConcurrentDeclarativeSource(
181203
config=config,
182204
catalog=catalog,
@@ -205,13 +227,12 @@ def create_declarative_source(
205227

206228

207229
def _parse_inputs_into_config_catalog_state(
208-
args: list[str],
230+
parsed_args: argparse.Namespace,
209231
) -> tuple[
210-
Mapping[str, Any] | None,
232+
MutableMapping[str, Any] | None,
211233
ConfiguredAirbyteCatalog | None,
212234
list[AirbyteStateMessage],
213235
]:
214-
parsed_args = AirbyteEntrypoint.parse_args(args)
215236
config = (
216237
ConcurrentDeclarativeSource.read_config(parsed_args.config)
217238
if hasattr(parsed_args, "config")
@@ -231,6 +252,44 @@ def _parse_inputs_into_config_catalog_state(
231252
return config, catalog, state
232253

233254

255+
def _parse_manifest_from_file(filepath: str) -> dict[str, Any] | None:
256+
"""Extract and parse a manifest file specified in the args."""
257+
try:
258+
with open(filepath, "r", encoding="utf-8") as manifest_file:
259+
manifest_content = yaml.safe_load(manifest_file)
260+
if manifest_content is None:
261+
raise ValueError(f"Manifest file at {filepath} is empty")
262+
if not isinstance(manifest_content, dict):
263+
raise ValueError(f"Manifest must be a dictionary, got {type(manifest_content)}")
264+
return manifest_content
265+
except Exception as error:
266+
raise ValueError(f"Failed to load manifest file from {filepath}: {error}")
267+
268+
269+
def _register_components_from_file(filepath: str) -> None:
270+
"""Load and register components from a Python file specified in the args."""
271+
import importlib.util
272+
import sys
273+
274+
components_path = Path(filepath)
275+
276+
module_name = "components"
277+
sdm_module_name = "source_declarative_manifest.components"
278+
279+
# Create module spec
280+
spec = importlib.util.spec_from_file_location(module_name, components_path)
281+
if spec is None or spec.loader is None:
282+
raise ImportError(f"Could not load module from {components_path}")
283+
284+
# Create module and execute code, registering the module before executing its code
285+
# To avoid issues with dataclasses that look up the module
286+
module = importlib.util.module_from_spec(spec)
287+
sys.modules[module_name] = module
288+
sys.modules[sdm_module_name] = module
289+
290+
spec.loader.exec_module(module)
291+
292+
234293
def run() -> None:
235294
args: list[str] = sys.argv[1:]
236295
handle_command(args)

0 commit comments

Comments
 (0)