Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
a488ab3
deduplication version 1
Mar 26, 2025
7d910ee
deduplication version 2
Mar 26, 2025
691d16a
updated duplicates collection
Mar 27, 2025
081e7a8
deduplicate most frequent tags, use existing refs if definitions.shar…
Mar 31, 2025
180af86
Merge remote-tracking branch 'origin/main' into baz/cdk/extract-commo…
Mar 31, 2025
138b607
formatted"
Mar 31, 2025
f10e601
updated to account type for the given duplicated key
Mar 31, 2025
66fe38e
add the reduce_commons: true, for Connector Builder case
Mar 31, 2025
8798042
enabled the reduce_commons: True for Connector Builder case
Mar 31, 2025
1d425ee
refactorred and cleaned up the code, moved to use the class instead
Apr 1, 2025
06b183a
formatted
Apr 1, 2025
1fa891c
formatted
Apr 1, 2025
00e31a7
cleaned up
Apr 1, 2025
a5aba82
added the dedicated tests
Apr 1, 2025
e017e92
Merge remote-tracking branch 'origin/main' into baz/cdk/extract-commo…
Apr 1, 2025
0e8394f
Merge remote-tracking branch 'origin/main' into baz/cdk/extract-commo…
Apr 2, 2025
9f7d498
formatted
Apr 2, 2025
6ec240a
updated normalizer
Apr 8, 2025
acdecdb
Merge remote-tracking branch 'origin/main' into baz/cdk/extract-commo…
Apr 8, 2025
5f5c6b1
attempt to fix the Connector Builder tests
Apr 8, 2025
e97afa5
Merge remote-tracking branch 'origin/main' into baz/cdk/extract-commo…
Apr 11, 2025
be3bab1
revert test
Apr 11, 2025
748892d
Merge remote-tracking branch 'origin/main' into baz/cdk/extract-commo…
Apr 15, 2025
b10d7a1
removed post_resolve_manifest flag
Apr 15, 2025
0587481
nit
Apr 15, 2025
d929167
add _-should_normalize flag handling
Apr 17, 2025
3859c5b
Merge remote-tracking branch 'origin/main' into baz/cdk/extract-commo…
Apr 17, 2025
9de27ef
formatted
Apr 17, 2025
c403a0e
rename sharable > linkable, shared > linked
Apr 17, 2025
297ae37
Merge remote-tracking branch 'origin/main' into baz/cdk/extract-commo…
Apr 19, 2025
38f7da6
updated the order of operations; normalization should go after pre-pr…
Apr 19, 2025
7d71f4b
fixed
Apr 19, 2025
304235c
add schema extraction + unit test
Apr 21, 2025
348aaae
Merge branch 'main' into baz/cdk/extract-common-manifest-parts
Apr 23, 2025
2c8d164
updated test comments
Apr 24, 2025
2010419
Merge remote-tracking branch 'origin/main' into baz/cdk/extract-commo…
Apr 25, 2025
8d7be4e
updated linked
Apr 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1583,6 +1583,7 @@ definitions:
title: URL Base
description: The base URL (scheme and host, e.g. "https://api.example.com") to match.
type: string
sharable: True
url_path_pattern:
title: URL Path Pattern
description: A regular expression pattern to match the URL path.
Expand Down Expand Up @@ -1841,6 +1842,7 @@ definitions:
- "{{ config['base_url'] or 'https://app.posthog.com'}}/api"
- "https://connect.squareup.com/v2/quotes/{{ stream_partition['id'] }}/quote_line_groups"
- "https://example.com/api/v1/resource/{{ next_page_token['id'] }}"
sharable: True
path:
title: URL Path
description: Path the specific API endpoint that this stream represents. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.
Expand Down Expand Up @@ -1872,6 +1874,7 @@ definitions:
- "$ref": "#/definitions/SessionTokenAuthenticator"
- "$ref": "#/definitions/LegacySessionTokenAuthenticator"
- "$ref": "#/definitions/SelectiveAuthenticator"
sharable: True
error_handler:
title: Error Handler
description: Error handler component that defines how to handle errors.
Expand Down
49 changes: 32 additions & 17 deletions airbyte_cdk/sources/declarative/manifest_declarative_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@
from airbyte_cdk.sources.declarative.parsers.manifest_component_transformer import (
ManifestComponentTransformer,
)
from airbyte_cdk.sources.declarative.parsers.manifest_normalizer import (
ManifestNormalizer,
)
from airbyte_cdk.sources.declarative.parsers.manifest_reference_resolver import (
ManifestReferenceResolver,
)
Expand All @@ -57,6 +60,24 @@
from airbyte_cdk.utils.traced_exception import AirbyteTracedException


def _get_declarative_component_schema() -> Dict[str, Any]:
try:
raw_component_schema = pkgutil.get_data(
"airbyte_cdk", "sources/declarative/declarative_component_schema.yaml"
)
if raw_component_schema is not None:
declarative_component_schema = yaml.load(raw_component_schema, Loader=yaml.SafeLoader)
return declarative_component_schema # type: ignore
else:
raise RuntimeError(
"Failed to read manifest component json schema required for deduplication"
)
except FileNotFoundError as e:
raise FileNotFoundError(
f"Failed to read manifest component json schema required for deduplication: {e}"
)


class ManifestDeclarativeSource(DeclarativeSource):
"""Declarative source defined by a manifest of low-code components that define source connector behavior"""

Expand All @@ -78,6 +99,8 @@ def __init__(
component_factory: optional factory if ModelToComponentFactory's default behavior needs to be tweaked.
"""
self.logger = logging.getLogger(f"airbyte.{self.name}")

self._declarative_component_schema = _get_declarative_component_schema()
# For ease of use we don't require the type to be specified at the top level manifest, but it should be included during processing
manifest = dict(source_config)
if "type" not in manifest:
Expand All @@ -87,6 +110,14 @@ def __init__(
self.components_module: ModuleType | None = get_registered_components_module(config=config)

resolved_source_config = ManifestReferenceResolver().preprocess_manifest(manifest)

if emit_connector_builder_messages:
# reduce commonalities in the manifest after the references have been resolved,
# used mostly for Connector Builder use cases.
resolved_source_config = ManifestNormalizer(
resolved_source_config, self._declarative_component_schema
).normalize()

propagated_source_config = ManifestComponentTransformer().propagate_types_and_parameters(
"", resolved_source_config, {}
)
Expand Down Expand Up @@ -266,22 +297,6 @@ def _validate_source(self) -> None:
"""
Validates the connector manifest against the declarative component schema
"""
try:
raw_component_schema = pkgutil.get_data(
"airbyte_cdk", "sources/declarative/declarative_component_schema.yaml"
)
if raw_component_schema is not None:
declarative_component_schema = yaml.load(
raw_component_schema, Loader=yaml.SafeLoader
)
else:
raise RuntimeError(
"Failed to read manifest component json schema required for validation"
)
except FileNotFoundError as e:
raise FileNotFoundError(
f"Failed to read manifest component json schema required for validation: {e}"
)

streams = self._source_config.get("streams")
dynamic_streams = self._source_config.get("dynamic_streams")
Expand All @@ -291,7 +306,7 @@ def _validate_source(self) -> None:
)

try:
validate(self._source_config, declarative_component_schema)
validate(self._source_config, self._declarative_component_schema)
except ValidationError as e:
raise ValidationError(
"Validation against json schema defined in declarative_component_schema.yaml schema failed"
Expand Down
9 changes: 9 additions & 0 deletions airbyte_cdk/sources/declarative/parsers/custom_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,12 @@ class UndefinedReferenceException(Exception):

def __init__(self, path: str, reference: str) -> None:
super().__init__(f"Undefined reference {reference} from {path}")


class ManifestNormalizationException(Exception):
"""
Raised when a circular reference is detected in a manifest.
"""

def __init__(self, message: str) -> None:
super().__init__(f"Failed to deduplicate manifest: {message}")
Loading
Loading