From cdd1ac908417c35322638bd90e9c6a03f070bb59 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Tue, 8 Apr 2025 22:33:16 +0000 Subject: [PATCH 01/21] feat: skip config validation during discovery for sources with DynamicSchemaLoader Co-Authored-By: Aaron Steers --- CHANGELOG.md | 4 ++ .../manifest_declarative_source.py | 35 +++++++++- ...ifest_declarative_source_dynamic_schema.py | 69 +++++++++++++++++++ 3 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 unit_tests/sources/declarative/test_manifest_declarative_source_dynamic_schema.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 31e73e411..a9633c7a3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ Newer updates can be found here: [GitHub Release Notes](https://github.com/airby # Changelog +## Unreleased + +- Added automatic detection of DynamicSchemaLoader to skip config validation during discovery + ## 6.5.2 bugfix: Ensure that streams with partition router are not executed concurrently diff --git a/airbyte_cdk/sources/declarative/manifest_declarative_source.py b/airbyte_cdk/sources/declarative/manifest_declarative_source.py index cfd258c6c..20d52e99a 100644 --- a/airbyte_cdk/sources/declarative/manifest_declarative_source.py +++ b/airbyte_cdk/sources/declarative/manifest_declarative_source.py @@ -58,7 +58,12 @@ class ManifestDeclarativeSource(DeclarativeSource): - """Declarative source defined by a manifest of low-code components that define source connector behavior""" + """Declarative source defined by a manifest of low-code components that define source connector behavior + + If any stream in the source uses a DynamicSchemaLoader, config validation will be skipped during + discovery. This allows sources with dynamic schemas to run discovery without requiring authentication + when the schema endpoint doesn't need auth to provide catalog information. + """ def __init__( self, @@ -108,6 +113,8 @@ def __init__( self._config = config or {} self._validate_source() + + self.check_config_against_spec = not self._uses_dynamic_schema_loader() @property def resolved_manifest(self) -> Mapping[str, Any]: @@ -440,3 +447,29 @@ def _dynamic_stream_configs( def _emit_manifest_debug_message(self, extra_args: dict[str, Any]) -> None: self.logger.debug("declarative source created from manifest", extra=extra_args) + + def _uses_dynamic_schema_loader(self) -> bool: + """ + Determines if any stream in the source uses a DynamicSchemaLoader. + + DynamicSchemaLoader makes a separate call to retrieve schema information, + which might not require authentication, so we can skip config validation + during discovery when it's used. + + Returns: + bool: True if any stream uses a DynamicSchemaLoader, False otherwise. + """ + for stream_config in self._stream_configs(self._source_config): + schema_loader = stream_config.get("schema_loader", {}) + if isinstance(schema_loader, dict) and schema_loader.get("type") == "DynamicSchemaLoader": + return True + + dynamic_streams = self._source_config.get("dynamic_streams", []) + if dynamic_streams: + for dynamic_stream in dynamic_streams: + stream_template = dynamic_stream.get("stream_template", {}) + schema_loader = stream_template.get("schema_loader", {}) + if isinstance(schema_loader, dict) and schema_loader.get("type") == "DynamicSchemaLoader": + return True + + return False diff --git a/unit_tests/sources/declarative/test_manifest_declarative_source_dynamic_schema.py b/unit_tests/sources/declarative/test_manifest_declarative_source_dynamic_schema.py new file mode 100644 index 000000000..94231b1db --- /dev/null +++ b/unit_tests/sources/declarative/test_manifest_declarative_source_dynamic_schema.py @@ -0,0 +1,69 @@ +# +# + +from unittest.mock import MagicMock + +import pytest + +from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource + + +def test_check_config_against_spec_with_dynamic_schema_loader(): + """Test that check_config_against_spec is False when DynamicSchemaLoader is used.""" + source_config = { + "type": "DeclarativeSource", + "check": {"type": "CheckStream"}, + "streams": [ + { + "name": "test_stream", + "schema_loader": { + "type": "DynamicSchemaLoader", + "retriever": { + "type": "SimpleRetriever", + "requester": {"url_base": "https://example.com", "http_method": "GET"}, + "record_selector": {"extractor": {"field_path": []}}, + }, + "schema_type_identifier": { + "key_pointer": ["name"], + } + }, + "retriever": { + "type": "SimpleRetriever", + "requester": {"url_base": "https://example.com", "http_method": "GET"}, + "record_selector": {"extractor": {"field_path": []}}, + } + } + ], + "version": "0.1.0" + } + + source = ManifestDeclarativeSource(source_config=source_config) + + assert source.check_config_against_spec is False + + +def test_check_config_against_spec_without_dynamic_schema_loader(): + """Test that check_config_against_spec is True when DynamicSchemaLoader is not used.""" + source_config = { + "type": "DeclarativeSource", + "check": {"type": "CheckStream"}, + "streams": [ + { + "name": "test_stream", + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": {} + }, + "retriever": { + "type": "SimpleRetriever", + "requester": {"url_base": "https://example.com", "http_method": "GET"}, + "record_selector": {"extractor": {"field_path": []}}, + } + } + ], + "version": "0.1.0" + } + + source = ManifestDeclarativeSource(source_config=source_config) + + assert source.check_config_against_spec is True From 7490ad1f9874bf98c6eca2b38a117dc819191e0b Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Tue, 8 Apr 2025 22:35:01 +0000 Subject: [PATCH 02/21] style: fix formatting issues Co-Authored-By: Aaron Steers --- .../manifest_declarative_source.py | 24 ++++++++++++------- ...ifest_declarative_source_dynamic_schema.py | 23 ++++++++---------- 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/airbyte_cdk/sources/declarative/manifest_declarative_source.py b/airbyte_cdk/sources/declarative/manifest_declarative_source.py index 20d52e99a..834356bff 100644 --- a/airbyte_cdk/sources/declarative/manifest_declarative_source.py +++ b/airbyte_cdk/sources/declarative/manifest_declarative_source.py @@ -59,7 +59,7 @@ class ManifestDeclarativeSource(DeclarativeSource): """Declarative source defined by a manifest of low-code components that define source connector behavior - + If any stream in the source uses a DynamicSchemaLoader, config validation will be skipped during discovery. This allows sources with dynamic schemas to run discovery without requiring authentication when the schema endpoint doesn't need auth to provide catalog information. @@ -113,7 +113,7 @@ def __init__( self._config = config or {} self._validate_source() - + self.check_config_against_spec = not self._uses_dynamic_schema_loader() @property @@ -447,29 +447,35 @@ def _dynamic_stream_configs( def _emit_manifest_debug_message(self, extra_args: dict[str, Any]) -> None: self.logger.debug("declarative source created from manifest", extra=extra_args) - + def _uses_dynamic_schema_loader(self) -> bool: """ Determines if any stream in the source uses a DynamicSchemaLoader. - + DynamicSchemaLoader makes a separate call to retrieve schema information, which might not require authentication, so we can skip config validation during discovery when it's used. - + Returns: bool: True if any stream uses a DynamicSchemaLoader, False otherwise. """ for stream_config in self._stream_configs(self._source_config): schema_loader = stream_config.get("schema_loader", {}) - if isinstance(schema_loader, dict) and schema_loader.get("type") == "DynamicSchemaLoader": + if ( + isinstance(schema_loader, dict) + and schema_loader.get("type") == "DynamicSchemaLoader" + ): return True - + dynamic_streams = self._source_config.get("dynamic_streams", []) if dynamic_streams: for dynamic_stream in dynamic_streams: stream_template = dynamic_stream.get("stream_template", {}) schema_loader = stream_template.get("schema_loader", {}) - if isinstance(schema_loader, dict) and schema_loader.get("type") == "DynamicSchemaLoader": + if ( + isinstance(schema_loader, dict) + and schema_loader.get("type") == "DynamicSchemaLoader" + ): return True - + return False diff --git a/unit_tests/sources/declarative/test_manifest_declarative_source_dynamic_schema.py b/unit_tests/sources/declarative/test_manifest_declarative_source_dynamic_schema.py index 94231b1db..82f4bbb63 100644 --- a/unit_tests/sources/declarative/test_manifest_declarative_source_dynamic_schema.py +++ b/unit_tests/sources/declarative/test_manifest_declarative_source_dynamic_schema.py @@ -25,20 +25,20 @@ def test_check_config_against_spec_with_dynamic_schema_loader(): }, "schema_type_identifier": { "key_pointer": ["name"], - } + }, }, "retriever": { "type": "SimpleRetriever", "requester": {"url_base": "https://example.com", "http_method": "GET"}, "record_selector": {"extractor": {"field_path": []}}, - } + }, } ], - "version": "0.1.0" + "version": "0.1.0", } - + source = ManifestDeclarativeSource(source_config=source_config) - + assert source.check_config_against_spec is False @@ -50,20 +50,17 @@ def test_check_config_against_spec_without_dynamic_schema_loader(): "streams": [ { "name": "test_stream", - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": {} - }, + "schema_loader": {"type": "InlineSchemaLoader", "schema": {}}, "retriever": { "type": "SimpleRetriever", "requester": {"url_base": "https://example.com", "http_method": "GET"}, "record_selector": {"extractor": {"field_path": []}}, - } + }, } ], - "version": "0.1.0" + "version": "0.1.0", } - + source = ManifestDeclarativeSource(source_config=source_config) - + assert source.check_config_against_spec is True From 9e84e1c7d4c8d9d495604299ec57dbce6b682f9a Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Tue, 8 Apr 2025 23:21:26 +0000 Subject: [PATCH 03/21] fix: update entrypoint to make --config optional for discovery Co-Authored-By: Aaron Steers --- airbyte_cdk/entrypoint.py | 59 ++++++----- ...ifest_declarative_source_dynamic_schema.py | 97 ++++++++++++++++++- unit_tests/test_entrypoint.py | 3 +- 3 files changed, 131 insertions(+), 28 deletions(-) diff --git a/airbyte_cdk/entrypoint.py b/airbyte_cdk/entrypoint.py index 0a13cfebe..3abe26d03 100644 --- a/airbyte_cdk/entrypoint.py +++ b/airbyte_cdk/entrypoint.py @@ -93,7 +93,7 @@ def parse_args(args: List[str]) -> argparse.Namespace: ) required_discover_parser = discover_parser.add_argument_group("required named arguments") required_discover_parser.add_argument( - "--config", type=str, required=True, help="path to the json configuration file" + "--config", type=str, required=False, help="path to the json configuration file" ) # read @@ -147,33 +147,44 @@ def run(self, parsed_args: argparse.Namespace) -> Iterable[str]: ] yield self.airbyte_message_to_string(message) else: - raw_config = self.source.read_config(parsed_args.config) - config = self.source.configure(raw_config, temp_dir) - - yield from [ - self.airbyte_message_to_string(queued_message) - for queued_message in self._emit_queued_messages(self.source) - ] - if cmd == "check": - yield from map( - AirbyteEntrypoint.airbyte_message_to_string, - self.check(source_spec, config), - ) - elif cmd == "discover": - yield from map( - AirbyteEntrypoint.airbyte_message_to_string, - self.discover(source_spec, config), - ) - elif cmd == "read": - config_catalog = self.source.read_catalog(parsed_args.catalog) - state = self.source.read_state(parsed_args.state) - + if cmd == "discover" and not parsed_args.config and not self.source.check_config_against_spec: + empty_config = {} + yield from [ + self.airbyte_message_to_string(queued_message) + for queued_message in self._emit_queued_messages(self.source) + ] yield from map( AirbyteEntrypoint.airbyte_message_to_string, - self.read(source_spec, config, config_catalog, state), + self.discover(source_spec, empty_config), ) else: - raise Exception("Unexpected command " + cmd) + raw_config = self.source.read_config(parsed_args.config) + config = self.source.configure(raw_config, temp_dir) + + yield from [ + self.airbyte_message_to_string(queued_message) + for queued_message in self._emit_queued_messages(self.source) + ] + if cmd == "check": + yield from map( + AirbyteEntrypoint.airbyte_message_to_string, + self.check(source_spec, config), + ) + elif cmd == "discover": + yield from map( + AirbyteEntrypoint.airbyte_message_to_string, + self.discover(source_spec, config), + ) + elif cmd == "read": + config_catalog = self.source.read_catalog(parsed_args.catalog) + state = self.source.read_state(parsed_args.state) + + yield from map( + AirbyteEntrypoint.airbyte_message_to_string, + self.read(source_spec, config, config_catalog, state), + ) + else: + raise Exception("Unexpected command " + cmd) finally: yield from [ self.airbyte_message_to_string(queued_message) diff --git a/unit_tests/sources/declarative/test_manifest_declarative_source_dynamic_schema.py b/unit_tests/sources/declarative/test_manifest_declarative_source_dynamic_schema.py index 82f4bbb63..ae60eb677 100644 --- a/unit_tests/sources/declarative/test_manifest_declarative_source_dynamic_schema.py +++ b/unit_tests/sources/declarative/test_manifest_declarative_source_dynamic_schema.py @@ -1,11 +1,12 @@ # # -from unittest.mock import MagicMock +from unittest.mock import MagicMock, patch import pytest - +from airbyte_cdk.models import AirbyteCatalog from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource +from airbyte_cdk.sources.utils.schema_helpers import check_config_against_spec_or_exit def test_check_config_against_spec_with_dynamic_schema_loader(): @@ -63,4 +64,96 @@ def test_check_config_against_spec_without_dynamic_schema_loader(): source = ManifestDeclarativeSource(source_config=source_config) + +@patch("airbyte_cdk.sources.declarative.manifest_declarative_source.ManifestDeclarativeSource.streams") +def test_discover_with_dynamic_schema_loader_no_config(mock_streams): + """Test that discovery works without config when DynamicSchemaLoader is used.""" + mock_stream = MagicMock() + mock_stream.name = "test_dynamic_stream" + + mock_airbyte_stream = MagicMock() + type(mock_airbyte_stream).name = "test_dynamic_stream" + mock_stream.as_airbyte_stream.return_value = mock_airbyte_stream + + mock_streams.return_value = [mock_stream] + + source_config = { + "type": "DeclarativeSource", + "check": {"type": "CheckStream"}, + "streams": [ + { + "name": "test_dynamic_stream", + "schema_loader": { + "type": "DynamicSchemaLoader", + "retriever": { + "type": "SimpleRetriever", + "requester": {"url_base": "https://example.com", "http_method": "GET"}, + "record_selector": {"extractor": {"field_path": []}}, + }, + "schema_type_identifier": { + "key_pointer": ["name"], + }, + }, + "retriever": { + "type": "SimpleRetriever", + "requester": {"url_base": "https://example.com", "http_method": "GET"}, + "record_selector": {"extractor": {"field_path": []}}, + }, + } + ], + "version": "0.1.0", + } + + source = ManifestDeclarativeSource(source_config=source_config) + + assert source.check_config_against_spec is False + + logger = MagicMock() + catalog = source.discover(logger, {}) + + assert isinstance(catalog, AirbyteCatalog) + assert len(catalog.streams) == 1 + assert catalog.streams[0].name == "test_dynamic_stream" + + +@patch("airbyte_cdk.sources.declarative.manifest_declarative_source.ManifestDeclarativeSource.streams") +def test_discover_without_dynamic_schema_loader_no_config(mock_streams): + """Test that discovery validates config when DynamicSchemaLoader is not used.""" + mock_stream = MagicMock() + mock_stream.name = "test_static_stream" + + mock_airbyte_stream = MagicMock() + type(mock_airbyte_stream).name = "test_static_stream" + mock_stream.as_airbyte_stream.return_value = mock_airbyte_stream + + mock_streams.return_value = [mock_stream] + + source_config = { + "type": "DeclarativeSource", + "check": {"type": "CheckStream"}, + "streams": [ + { + "name": "test_static_stream", + "schema_loader": {"type": "InlineSchemaLoader", "schema": {}}, + "retriever": { + "type": "SimpleRetriever", + "requester": {"url_base": "https://example.com", "http_method": "GET"}, + "record_selector": {"extractor": {"field_path": []}}, + }, + } + ], + "version": "0.1.0", + } + + source = ManifestDeclarativeSource(source_config=source_config) + + assert source.check_config_against_spec is True + + logger = MagicMock() + catalog = source.discover(logger, {}) + + assert isinstance(catalog, AirbyteCatalog) + assert len(catalog.streams) == 1 + assert catalog.streams[0].name == "test_static_stream" + assert source.check_config_against_spec is True diff --git a/unit_tests/test_entrypoint.py b/unit_tests/test_entrypoint.py index e906e8b39..6a5c9ce13 100644 --- a/unit_tests/test_entrypoint.py +++ b/unit_tests/test_entrypoint.py @@ -172,14 +172,13 @@ def test_parse_valid_args( ["cmd", "args"], [ ("check", {"config": "config_path"}), - ("discover", {"config": "config_path"}), ("read", {"config": "config_path", "catalog": "catalog_path"}), ], ) def test_parse_missing_required_args( cmd: str, args: MutableMapping[str, Any], entrypoint: AirbyteEntrypoint ): - required_args = {"check": ["config"], "discover": ["config"], "read": ["config", "catalog"]} + required_args = {"check": ["config"], "read": ["config", "catalog"]} for required_arg in required_args[cmd]: argcopy = deepcopy(args) del argcopy[required_arg] From 47bd67c40c53fc240dd795f2982309c2f5ffef93 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Tue, 8 Apr 2025 23:23:08 +0000 Subject: [PATCH 04/21] style: fix formatting issues Co-Authored-By: Aaron Steers --- airbyte_cdk/entrypoint.py | 6 ++- ...ifest_declarative_source_dynamic_schema.py | 37 +++++++++++-------- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/airbyte_cdk/entrypoint.py b/airbyte_cdk/entrypoint.py index 3abe26d03..bb5f87631 100644 --- a/airbyte_cdk/entrypoint.py +++ b/airbyte_cdk/entrypoint.py @@ -147,7 +147,11 @@ def run(self, parsed_args: argparse.Namespace) -> Iterable[str]: ] yield self.airbyte_message_to_string(message) else: - if cmd == "discover" and not parsed_args.config and not self.source.check_config_against_spec: + if ( + cmd == "discover" + and not parsed_args.config + and not self.source.check_config_against_spec + ): empty_config = {} yield from [ self.airbyte_message_to_string(queued_message) diff --git a/unit_tests/sources/declarative/test_manifest_declarative_source_dynamic_schema.py b/unit_tests/sources/declarative/test_manifest_declarative_source_dynamic_schema.py index ae60eb677..c39af6fd2 100644 --- a/unit_tests/sources/declarative/test_manifest_declarative_source_dynamic_schema.py +++ b/unit_tests/sources/declarative/test_manifest_declarative_source_dynamic_schema.py @@ -4,6 +4,7 @@ from unittest.mock import MagicMock, patch import pytest + from airbyte_cdk.models import AirbyteCatalog from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource from airbyte_cdk.sources.utils.schema_helpers import check_config_against_spec_or_exit @@ -65,18 +66,20 @@ def test_check_config_against_spec_without_dynamic_schema_loader(): source = ManifestDeclarativeSource(source_config=source_config) -@patch("airbyte_cdk.sources.declarative.manifest_declarative_source.ManifestDeclarativeSource.streams") +@patch( + "airbyte_cdk.sources.declarative.manifest_declarative_source.ManifestDeclarativeSource.streams" +) def test_discover_with_dynamic_schema_loader_no_config(mock_streams): """Test that discovery works without config when DynamicSchemaLoader is used.""" mock_stream = MagicMock() mock_stream.name = "test_dynamic_stream" - + mock_airbyte_stream = MagicMock() type(mock_airbyte_stream).name = "test_dynamic_stream" mock_stream.as_airbyte_stream.return_value = mock_airbyte_stream - + mock_streams.return_value = [mock_stream] - + source_config = { "type": "DeclarativeSource", "check": {"type": "CheckStream"}, @@ -103,31 +106,33 @@ def test_discover_with_dynamic_schema_loader_no_config(mock_streams): ], "version": "0.1.0", } - + source = ManifestDeclarativeSource(source_config=source_config) - + assert source.check_config_against_spec is False - + logger = MagicMock() catalog = source.discover(logger, {}) - + assert isinstance(catalog, AirbyteCatalog) assert len(catalog.streams) == 1 assert catalog.streams[0].name == "test_dynamic_stream" -@patch("airbyte_cdk.sources.declarative.manifest_declarative_source.ManifestDeclarativeSource.streams") +@patch( + "airbyte_cdk.sources.declarative.manifest_declarative_source.ManifestDeclarativeSource.streams" +) def test_discover_without_dynamic_schema_loader_no_config(mock_streams): """Test that discovery validates config when DynamicSchemaLoader is not used.""" mock_stream = MagicMock() mock_stream.name = "test_static_stream" - + mock_airbyte_stream = MagicMock() type(mock_airbyte_stream).name = "test_static_stream" mock_stream.as_airbyte_stream.return_value = mock_airbyte_stream - + mock_streams.return_value = [mock_stream] - + source_config = { "type": "DeclarativeSource", "check": {"type": "CheckStream"}, @@ -144,14 +149,14 @@ def test_discover_without_dynamic_schema_loader_no_config(mock_streams): ], "version": "0.1.0", } - + source = ManifestDeclarativeSource(source_config=source_config) - + assert source.check_config_against_spec is True - + logger = MagicMock() catalog = source.discover(logger, {}) - + assert isinstance(catalog, AirbyteCatalog) assert len(catalog.streams) == 1 assert catalog.streams[0].name == "test_static_stream" From 36d7f1fa3f97c4554f9b18b1fab7c2361ec096e9 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Tue, 8 Apr 2025 23:25:35 +0000 Subject: [PATCH 05/21] fix: add type annotation for empty_config Co-Authored-By: Aaron Steers --- airbyte_cdk/entrypoint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte_cdk/entrypoint.py b/airbyte_cdk/entrypoint.py index bb5f87631..be3421606 100644 --- a/airbyte_cdk/entrypoint.py +++ b/airbyte_cdk/entrypoint.py @@ -152,7 +152,7 @@ def run(self, parsed_args: argparse.Namespace) -> Iterable[str]: and not parsed_args.config and not self.source.check_config_against_spec ): - empty_config = {} + empty_config: dict[str, Any] = {} yield from [ self.airbyte_message_to_string(queued_message) for queued_message in self._emit_queued_messages(self.source) From b002218bc825a4dc675d9ee9b29dd5319916cd22 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Tue, 8 Apr 2025 23:31:22 +0000 Subject: [PATCH 06/21] refactor: use generator comprehension instead of list comprehension Co-Authored-By: Aaron Steers --- airbyte_cdk/entrypoint.py | 79 +++++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 40 deletions(-) diff --git a/airbyte_cdk/entrypoint.py b/airbyte_cdk/entrypoint.py index be3421606..9df1a23a0 100644 --- a/airbyte_cdk/entrypoint.py +++ b/airbyte_cdk/entrypoint.py @@ -141,54 +141,53 @@ def run(self, parsed_args: argparse.Namespace) -> Iterable[str]: ) if cmd == "spec": message = AirbyteMessage(type=Type.SPEC, spec=source_spec) - yield from [ + yield from ( self.airbyte_message_to_string(queued_message) for queued_message in self._emit_queued_messages(self.source) - ] + ) yield self.airbyte_message_to_string(message) + elif ( + cmd == "discover" + and not parsed_args.config + and not self.source.check_config_against_spec + ): + empty_config: dict[str, Any] = {} + yield from ( + self.airbyte_message_to_string(queued_message) + for queued_message in self._emit_queued_messages(self.source) + ) + yield from map( + AirbyteEntrypoint.airbyte_message_to_string, + self.discover(source_spec, empty_config), + ) else: - if ( - cmd == "discover" - and not parsed_args.config - and not self.source.check_config_against_spec - ): - empty_config: dict[str, Any] = {} - yield from [ - self.airbyte_message_to_string(queued_message) - for queued_message in self._emit_queued_messages(self.source) - ] + raw_config = self.source.read_config(parsed_args.config) + config = self.source.configure(raw_config, temp_dir) + + yield from ( + self.airbyte_message_to_string(queued_message) + for queued_message in self._emit_queued_messages(self.source) + ) + if cmd == "check": + yield from map( + AirbyteEntrypoint.airbyte_message_to_string, + self.check(source_spec, config), + ) + elif cmd == "discover": + yield from map( + AirbyteEntrypoint.airbyte_message_to_string, + self.discover(source_spec, config), + ) + elif cmd == "read": + config_catalog = self.source.read_catalog(parsed_args.catalog) + state = self.source.read_state(parsed_args.state) + yield from map( AirbyteEntrypoint.airbyte_message_to_string, - self.discover(source_spec, empty_config), + self.read(source_spec, config, config_catalog, state), ) else: - raw_config = self.source.read_config(parsed_args.config) - config = self.source.configure(raw_config, temp_dir) - - yield from [ - self.airbyte_message_to_string(queued_message) - for queued_message in self._emit_queued_messages(self.source) - ] - if cmd == "check": - yield from map( - AirbyteEntrypoint.airbyte_message_to_string, - self.check(source_spec, config), - ) - elif cmd == "discover": - yield from map( - AirbyteEntrypoint.airbyte_message_to_string, - self.discover(source_spec, config), - ) - elif cmd == "read": - config_catalog = self.source.read_catalog(parsed_args.catalog) - state = self.source.read_state(parsed_args.state) - - yield from map( - AirbyteEntrypoint.airbyte_message_to_string, - self.read(source_spec, config, config_catalog, state), - ) - else: - raise Exception("Unexpected command " + cmd) + raise Exception("Unexpected command " + cmd) finally: yield from [ self.airbyte_message_to_string(queued_message) From acbab7e95b2af768d5b1ff4cd0f2b424fc32c70e Mon Sep 17 00:00:00 2001 From: "Aaron (\"AJ\") Steers" Date: Tue, 8 Apr 2025 16:34:19 -0700 Subject: [PATCH 07/21] Update airbyte_cdk/entrypoint.py --- airbyte_cdk/entrypoint.py | 1 + 1 file changed, 1 insertion(+) diff --git a/airbyte_cdk/entrypoint.py b/airbyte_cdk/entrypoint.py index 9df1a23a0..a91e20f0a 100644 --- a/airbyte_cdk/entrypoint.py +++ b/airbyte_cdk/entrypoint.py @@ -151,6 +151,7 @@ def run(self, parsed_args: argparse.Namespace) -> Iterable[str]: and not parsed_args.config and not self.source.check_config_against_spec ): + # Connector supports unprivileged discover empty_config: dict[str, Any] = {} yield from ( self.airbyte_message_to_string(queued_message) From d33dcdd37d952b53f5d2042daa207b03c4320acc Mon Sep 17 00:00:00 2001 From: "Aaron (\"AJ\") Steers" Date: Tue, 8 Apr 2025 16:40:44 -0700 Subject: [PATCH 08/21] Update CHANGELOG.md --- CHANGELOG.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a9633c7a3..31e73e411 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,10 +4,6 @@ Newer updates can be found here: [GitHub Release Notes](https://github.com/airby # Changelog -## Unreleased - -- Added automatic detection of DynamicSchemaLoader to skip config validation during discovery - ## 6.5.2 bugfix: Ensure that streams with partition router are not executed concurrently From 4253f2802c8d5f8e40962523a1f3fba36947fd43 Mon Sep 17 00:00:00 2001 From: "Aaron (\"AJ\") Steers" Date: Tue, 8 Apr 2025 16:42:14 -0700 Subject: [PATCH 09/21] Update airbyte_cdk/sources/declarative/manifest_declarative_source.py --- .../sources/declarative/manifest_declarative_source.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/airbyte_cdk/sources/declarative/manifest_declarative_source.py b/airbyte_cdk/sources/declarative/manifest_declarative_source.py index 834356bff..b835079b4 100644 --- a/airbyte_cdk/sources/declarative/manifest_declarative_source.py +++ b/airbyte_cdk/sources/declarative/manifest_declarative_source.py @@ -58,12 +58,7 @@ class ManifestDeclarativeSource(DeclarativeSource): - """Declarative source defined by a manifest of low-code components that define source connector behavior - - If any stream in the source uses a DynamicSchemaLoader, config validation will be skipped during - discovery. This allows sources with dynamic schemas to run discovery without requiring authentication - when the schema endpoint doesn't need auth to provide catalog information. - """ + """Declarative source defined by a manifest of low-code components that define source connector behavior""" def __init__( self, From 64610b98a4d8b805fe69e42f722712c934e8414a Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Tue, 8 Apr 2025 23:50:22 +0000 Subject: [PATCH 10/21] feat: add check_config_during_discover flag for targeted config validation control Co-Authored-By: Aaron Steers --- CHANGELOG.md | 4 ++++ airbyte_cdk/entrypoint.py | 5 +++-- .../manifest_declarative_source.py | 3 ++- ...ifest_declarative_source_dynamic_schema.py | 19 +++++++++++++------ 4 files changed, 22 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 31e73e411..d644c24fb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ Newer updates can be found here: [GitHub Release Notes](https://github.com/airby # Changelog +## Unreleased + +- Added `check_config_during_discover` flag to declarative sources to skip config validation during discovery for sources with DynamicSchemaLoader + ## 6.5.2 bugfix: Ensure that streams with partition router are not executed concurrently diff --git a/airbyte_cdk/entrypoint.py b/airbyte_cdk/entrypoint.py index a91e20f0a..f5bcf3a6b 100644 --- a/airbyte_cdk/entrypoint.py +++ b/airbyte_cdk/entrypoint.py @@ -149,7 +149,8 @@ def run(self, parsed_args: argparse.Namespace) -> Iterable[str]: elif ( cmd == "discover" and not parsed_args.config - and not self.source.check_config_against_spec + and hasattr(self.source, "check_config_during_discover") + and self.source.check_config_during_discover ): # Connector supports unprivileged discover empty_config: dict[str, Any] = {} @@ -240,7 +241,7 @@ def discover( self, source_spec: ConnectorSpecification, config: TConfig ) -> Iterable[AirbyteMessage]: self.set_up_secret_filter(config, source_spec.connectionSpecification) - if self.source.check_config_against_spec: + if not hasattr(self.source, "check_config_during_discover") or not self.source.check_config_during_discover: self.validate_connection(source_spec, config) catalog = self.source.discover(self.logger, config) diff --git a/airbyte_cdk/sources/declarative/manifest_declarative_source.py b/airbyte_cdk/sources/declarative/manifest_declarative_source.py index b835079b4..8798cd286 100644 --- a/airbyte_cdk/sources/declarative/manifest_declarative_source.py +++ b/airbyte_cdk/sources/declarative/manifest_declarative_source.py @@ -109,7 +109,8 @@ def __init__( self._config = config or {} self._validate_source() - self.check_config_against_spec = not self._uses_dynamic_schema_loader() + self.check_config_during_discover = self._uses_dynamic_schema_loader() + self.check_config_against_spec = True @property def resolved_manifest(self) -> Mapping[str, Any]: diff --git a/unit_tests/sources/declarative/test_manifest_declarative_source_dynamic_schema.py b/unit_tests/sources/declarative/test_manifest_declarative_source_dynamic_schema.py index c39af6fd2..bcd051a71 100644 --- a/unit_tests/sources/declarative/test_manifest_declarative_source_dynamic_schema.py +++ b/unit_tests/sources/declarative/test_manifest_declarative_source_dynamic_schema.py @@ -10,8 +10,8 @@ from airbyte_cdk.sources.utils.schema_helpers import check_config_against_spec_or_exit -def test_check_config_against_spec_with_dynamic_schema_loader(): - """Test that check_config_against_spec is False when DynamicSchemaLoader is used.""" +def test_check_config_during_discover_with_dynamic_schema_loader(): + """Test that check_config_during_discover is True when DynamicSchemaLoader is used.""" source_config = { "type": "DeclarativeSource", "check": {"type": "CheckStream"}, @@ -41,11 +41,12 @@ def test_check_config_against_spec_with_dynamic_schema_loader(): source = ManifestDeclarativeSource(source_config=source_config) - assert source.check_config_against_spec is False + assert source.check_config_during_discover is True + assert source.check_config_against_spec is True -def test_check_config_against_spec_without_dynamic_schema_loader(): - """Test that check_config_against_spec is True when DynamicSchemaLoader is not used.""" +def test_check_config_during_discover_without_dynamic_schema_loader(): + """Test that check_config_during_discover is False when DynamicSchemaLoader is not used.""" source_config = { "type": "DeclarativeSource", "check": {"type": "CheckStream"}, @@ -64,6 +65,9 @@ def test_check_config_against_spec_without_dynamic_schema_loader(): } source = ManifestDeclarativeSource(source_config=source_config) + + assert source.check_config_during_discover is False + assert source.check_config_against_spec is True @patch( @@ -109,7 +113,8 @@ def test_discover_with_dynamic_schema_loader_no_config(mock_streams): source = ManifestDeclarativeSource(source_config=source_config) - assert source.check_config_against_spec is False + assert source.check_config_during_discover is True + assert source.check_config_against_spec is True logger = MagicMock() catalog = source.discover(logger, {}) @@ -152,6 +157,7 @@ def test_discover_without_dynamic_schema_loader_no_config(mock_streams): source = ManifestDeclarativeSource(source_config=source_config) + assert source.check_config_during_discover is False assert source.check_config_against_spec is True logger = MagicMock() @@ -161,4 +167,5 @@ def test_discover_without_dynamic_schema_loader_no_config(mock_streams): assert len(catalog.streams) == 1 assert catalog.streams[0].name == "test_static_stream" + assert source.check_config_during_discover is False assert source.check_config_against_spec is True From b228857c6fe4ff0a54dc9a20c40179f0c273da44 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Tue, 8 Apr 2025 23:51:42 +0000 Subject: [PATCH 11/21] style: fix formatting issues Co-Authored-By: Aaron Steers --- airbyte_cdk/entrypoint.py | 5 ++++- .../test_manifest_declarative_source_dynamic_schema.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/airbyte_cdk/entrypoint.py b/airbyte_cdk/entrypoint.py index f5bcf3a6b..f0f888ce9 100644 --- a/airbyte_cdk/entrypoint.py +++ b/airbyte_cdk/entrypoint.py @@ -241,7 +241,10 @@ def discover( self, source_spec: ConnectorSpecification, config: TConfig ) -> Iterable[AirbyteMessage]: self.set_up_secret_filter(config, source_spec.connectionSpecification) - if not hasattr(self.source, "check_config_during_discover") or not self.source.check_config_during_discover: + if ( + not hasattr(self.source, "check_config_during_discover") + or not self.source.check_config_during_discover + ): self.validate_connection(source_spec, config) catalog = self.source.discover(self.logger, config) diff --git a/unit_tests/sources/declarative/test_manifest_declarative_source_dynamic_schema.py b/unit_tests/sources/declarative/test_manifest_declarative_source_dynamic_schema.py index bcd051a71..d1eeb9224 100644 --- a/unit_tests/sources/declarative/test_manifest_declarative_source_dynamic_schema.py +++ b/unit_tests/sources/declarative/test_manifest_declarative_source_dynamic_schema.py @@ -65,7 +65,7 @@ def test_check_config_during_discover_without_dynamic_schema_loader(): } source = ManifestDeclarativeSource(source_config=source_config) - + assert source.check_config_during_discover is False assert source.check_config_against_spec is True From 77772c3ae982bab86e9b085b43bedfd34ae87c8e Mon Sep 17 00:00:00 2001 From: "Aaron (\"AJ\") Steers" Date: Tue, 8 Apr 2025 16:52:31 -0700 Subject: [PATCH 12/21] Update CHANGELOG.md --- CHANGELOG.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d644c24fb..31e73e411 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,10 +4,6 @@ Newer updates can be found here: [GitHub Release Notes](https://github.com/airby # Changelog -## Unreleased - -- Added `check_config_during_discover` flag to declarative sources to skip config validation during discovery for sources with DynamicSchemaLoader - ## 6.5.2 bugfix: Ensure that streams with partition router are not executed concurrently From 6ca213c761b57a71bd5206b6bc30f474be5e2b70 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Tue, 8 Apr 2025 23:53:28 +0000 Subject: [PATCH 13/21] refactor: push check_config_during_discover flag into connector base class Co-Authored-By: Aaron Steers --- airbyte_cdk/entrypoint.py | 5 +---- airbyte_cdk/sources/abstract_source.py | 13 +++++++++++++ 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/airbyte_cdk/entrypoint.py b/airbyte_cdk/entrypoint.py index f0f888ce9..1b43f59f2 100644 --- a/airbyte_cdk/entrypoint.py +++ b/airbyte_cdk/entrypoint.py @@ -241,10 +241,7 @@ def discover( self, source_spec: ConnectorSpecification, config: TConfig ) -> Iterable[AirbyteMessage]: self.set_up_secret_filter(config, source_spec.connectionSpecification) - if ( - not hasattr(self.source, "check_config_during_discover") - or not self.source.check_config_during_discover - ): + if not self.source.check_config_during_discover: self.validate_connection(source_spec, config) catalog = self.source.discover(self.logger, config) diff --git a/airbyte_cdk/sources/abstract_source.py b/airbyte_cdk/sources/abstract_source.py index ab9ee48b8..b84f5d457 100644 --- a/airbyte_cdk/sources/abstract_source.py +++ b/airbyte_cdk/sources/abstract_source.py @@ -324,3 +324,16 @@ def stop_sync_on_stream_failure(self) -> bool: on the first error seen and emit a single error trace message for that stream. """ return False + + @property + def check_config_during_discover(self) -> bool: + """ + Determines whether config validation should be skipped during discovery. + + By default, config validation is not skipped during discovery. This can be overridden + by sources that can provide catalog information without requiring authentication. + + Returns: + bool: True if config validation should be skipped during discovery, False otherwise. + """ + return False From dce4f8ccf7345addaada2e42a0348243aaa9eec4 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Tue, 8 Apr 2025 23:56:04 +0000 Subject: [PATCH 14/21] style: fix formatting issues Co-Authored-By: Aaron Steers --- airbyte_cdk/sources/abstract_source.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/airbyte_cdk/sources/abstract_source.py b/airbyte_cdk/sources/abstract_source.py index b84f5d457..0de50286f 100644 --- a/airbyte_cdk/sources/abstract_source.py +++ b/airbyte_cdk/sources/abstract_source.py @@ -324,15 +324,15 @@ def stop_sync_on_stream_failure(self) -> bool: on the first error seen and emit a single error trace message for that stream. """ return False - + @property def check_config_during_discover(self) -> bool: """ Determines whether config validation should be skipped during discovery. - + By default, config validation is not skipped during discovery. This can be overridden by sources that can provide catalog information without requiring authentication. - + Returns: bool: True if config validation should be skipped during discovery, False otherwise. """ From 24a0919f58432a66fb991727a40380e81aec92db Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Wed, 9 Apr 2025 00:03:34 +0000 Subject: [PATCH 15/21] fix: resolve MyPy type checking issues with check_config_during_discover property Co-Authored-By: Aaron Steers --- airbyte_cdk/sources/abstract_source.py | 14 +++++++++++++- .../declarative/manifest_declarative_source.py | 6 ++++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/airbyte_cdk/sources/abstract_source.py b/airbyte_cdk/sources/abstract_source.py index 0de50286f..ca997c901 100644 --- a/airbyte_cdk/sources/abstract_source.py +++ b/airbyte_cdk/sources/abstract_source.py @@ -325,6 +325,8 @@ def stop_sync_on_stream_failure(self) -> bool: """ return False + _check_config_during_discover: bool = False + @property def check_config_during_discover(self) -> bool: """ @@ -336,4 +338,14 @@ def check_config_during_discover(self) -> bool: Returns: bool: True if config validation should be skipped during discovery, False otherwise. """ - return False + return self._check_config_during_discover + + @check_config_during_discover.setter + def check_config_during_discover(self, value: bool) -> None: + """ + Sets whether config validation should be skipped during discovery. + + Args: + value: True if config validation should be skipped during discovery, False otherwise. + """ + self._check_config_during_discover = value diff --git a/airbyte_cdk/sources/declarative/manifest_declarative_source.py b/airbyte_cdk/sources/declarative/manifest_declarative_source.py index 8798cd286..f17ef81ee 100644 --- a/airbyte_cdk/sources/declarative/manifest_declarative_source.py +++ b/airbyte_cdk/sources/declarative/manifest_declarative_source.py @@ -47,6 +47,7 @@ ) from airbyte_cdk.sources.declarative.resolvers import COMPONENTS_RESOLVER_TYPE_MAPPING from airbyte_cdk.sources.message import MessageRepository +from airbyte_cdk.sources.source import Source from airbyte_cdk.sources.streams.core import Stream from airbyte_cdk.sources.types import ConnectionDefinition from airbyte_cdk.sources.utils.slice_logger import ( @@ -108,9 +109,10 @@ def __init__( self._config = config or {} self._validate_source() - - self.check_config_during_discover = self._uses_dynamic_schema_loader() + self.check_config_against_spec = True + + self.check_config_during_discover = self._uses_dynamic_schema_loader() @property def resolved_manifest(self) -> Mapping[str, Any]: From f920f04ba81d145bb369f73632decc448b6b9d7d Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Wed, 9 Apr 2025 00:07:26 +0000 Subject: [PATCH 16/21] refactor: move check_config_during_discover to BaseConnector class Co-Authored-By: Aaron Steers --- airbyte_cdk/connector.py | 8 ++++++ airbyte_cdk/sources/abstract_source.py | 25 ------------------- ...ifest_declarative_source_dynamic_schema.py | 5 +++- 3 files changed, 12 insertions(+), 26 deletions(-) diff --git a/airbyte_cdk/connector.py b/airbyte_cdk/connector.py index 342ecee2d..9f470289b 100644 --- a/airbyte_cdk/connector.py +++ b/airbyte_cdk/connector.py @@ -33,6 +33,14 @@ def load_optional_package_file(package: str, filename: str) -> Optional[bytes]: class BaseConnector(ABC, Generic[TConfig]): # configure whether the `check_config_against_spec_or_exit()` needs to be called check_config_against_spec: bool = True + + check_config_during_discover: bool = False + """ + Determines whether config validation should be skipped during discovery. + + By default, config validation is not skipped during discovery. This can be overridden + by sources that can provide catalog information without requiring authentication. + """ @abstractmethod def configure(self, config: Mapping[str, Any], temp_dir: str) -> TConfig: diff --git a/airbyte_cdk/sources/abstract_source.py b/airbyte_cdk/sources/abstract_source.py index ca997c901..ab9ee48b8 100644 --- a/airbyte_cdk/sources/abstract_source.py +++ b/airbyte_cdk/sources/abstract_source.py @@ -324,28 +324,3 @@ def stop_sync_on_stream_failure(self) -> bool: on the first error seen and emit a single error trace message for that stream. """ return False - - _check_config_during_discover: bool = False - - @property - def check_config_during_discover(self) -> bool: - """ - Determines whether config validation should be skipped during discovery. - - By default, config validation is not skipped during discovery. This can be overridden - by sources that can provide catalog information without requiring authentication. - - Returns: - bool: True if config validation should be skipped during discovery, False otherwise. - """ - return self._check_config_during_discover - - @check_config_during_discover.setter - def check_config_during_discover(self, value: bool) -> None: - """ - Sets whether config validation should be skipped during discovery. - - Args: - value: True if config validation should be skipped during discovery, False otherwise. - """ - self._check_config_during_discover = value diff --git a/unit_tests/sources/declarative/test_manifest_declarative_source_dynamic_schema.py b/unit_tests/sources/declarative/test_manifest_declarative_source_dynamic_schema.py index d1eeb9224..81364a5e0 100644 --- a/unit_tests/sources/declarative/test_manifest_declarative_source_dynamic_schema.py +++ b/unit_tests/sources/declarative/test_manifest_declarative_source_dynamic_schema.py @@ -1,6 +1,9 @@ # +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +"""Tests for the ManifestDeclarativeSource with DynamicSchemaLoader.""" + from unittest.mock import MagicMock, patch import pytest @@ -65,7 +68,7 @@ def test_check_config_during_discover_without_dynamic_schema_loader(): } source = ManifestDeclarativeSource(source_config=source_config) - + assert source.check_config_during_discover is False assert source.check_config_against_spec is True From f01525f0b9d8fd1abfb2fc828dd135daf7fcc45a Mon Sep 17 00:00:00 2001 From: "Aaron (\"AJ\") Steers" Date: Tue, 8 Apr 2025 17:09:31 -0700 Subject: [PATCH 17/21] Update airbyte_cdk/connector.py --- airbyte_cdk/connector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte_cdk/connector.py b/airbyte_cdk/connector.py index 9f470289b..e04d5b111 100644 --- a/airbyte_cdk/connector.py +++ b/airbyte_cdk/connector.py @@ -31,8 +31,8 @@ def load_optional_package_file(package: str, filename: str) -> Optional[bytes]: class BaseConnector(ABC, Generic[TConfig]): - # configure whether the `check_config_against_spec_or_exit()` needs to be called check_config_against_spec: bool = True + """Configure whether `check_config_against_spec_or_exit()` needs to be called.""" check_config_during_discover: bool = False """ From 769d3613052ba77894fc5a62af29bbe1a1ebb93b Mon Sep 17 00:00:00 2001 From: "Aaron (\"AJ\") Steers" Date: Tue, 8 Apr 2025 17:09:54 -0700 Subject: [PATCH 18/21] Update airbyte_cdk/connector.py --- airbyte_cdk/connector.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/airbyte_cdk/connector.py b/airbyte_cdk/connector.py index e04d5b111..47ec5cf8b 100644 --- a/airbyte_cdk/connector.py +++ b/airbyte_cdk/connector.py @@ -35,8 +35,7 @@ class BaseConnector(ABC, Generic[TConfig]): """Configure whether `check_config_against_spec_or_exit()` needs to be called.""" check_config_during_discover: bool = False - """ - Determines whether config validation should be skipped during discovery. + """Determines whether config validation should be skipped during discovery. By default, config validation is not skipped during discovery. This can be overridden by sources that can provide catalog information without requiring authentication. From c3cbad84a6973fa97cdafde7be96f8635b951540 Mon Sep 17 00:00:00 2001 From: "Aaron (\"AJ\") Steers" Date: Tue, 8 Apr 2025 17:11:15 -0700 Subject: [PATCH 19/21] Update airbyte_cdk/sources/declarative/manifest_declarative_source.py --- airbyte_cdk/sources/declarative/manifest_declarative_source.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/airbyte_cdk/sources/declarative/manifest_declarative_source.py b/airbyte_cdk/sources/declarative/manifest_declarative_source.py index f17ef81ee..cbcef613a 100644 --- a/airbyte_cdk/sources/declarative/manifest_declarative_source.py +++ b/airbyte_cdk/sources/declarative/manifest_declarative_source.py @@ -110,8 +110,6 @@ def __init__( self._config = config or {} self._validate_source() - self.check_config_against_spec = True - self.check_config_during_discover = self._uses_dynamic_schema_loader() @property From 3cb8faf91a661341070d0c0b0324bd846ea24a50 Mon Sep 17 00:00:00 2001 From: octavia-squidington-iii Date: Wed, 9 Apr 2025 01:41:59 +0000 Subject: [PATCH 20/21] Auto-fix lint and format issues --- airbyte_cdk/connector.py | 2 +- airbyte_cdk/sources/declarative/manifest_declarative_source.py | 2 +- .../test_manifest_declarative_source_dynamic_schema.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/airbyte_cdk/connector.py b/airbyte_cdk/connector.py index 47ec5cf8b..46f02f161 100644 --- a/airbyte_cdk/connector.py +++ b/airbyte_cdk/connector.py @@ -33,7 +33,7 @@ def load_optional_package_file(package: str, filename: str) -> Optional[bytes]: class BaseConnector(ABC, Generic[TConfig]): check_config_against_spec: bool = True """Configure whether `check_config_against_spec_or_exit()` needs to be called.""" - + check_config_during_discover: bool = False """Determines whether config validation should be skipped during discovery. diff --git a/airbyte_cdk/sources/declarative/manifest_declarative_source.py b/airbyte_cdk/sources/declarative/manifest_declarative_source.py index cbcef613a..3ff6a824c 100644 --- a/airbyte_cdk/sources/declarative/manifest_declarative_source.py +++ b/airbyte_cdk/sources/declarative/manifest_declarative_source.py @@ -109,7 +109,7 @@ def __init__( self._config = config or {} self._validate_source() - + self.check_config_during_discover = self._uses_dynamic_schema_loader() @property diff --git a/unit_tests/sources/declarative/test_manifest_declarative_source_dynamic_schema.py b/unit_tests/sources/declarative/test_manifest_declarative_source_dynamic_schema.py index 81364a5e0..db91132f0 100644 --- a/unit_tests/sources/declarative/test_manifest_declarative_source_dynamic_schema.py +++ b/unit_tests/sources/declarative/test_manifest_declarative_source_dynamic_schema.py @@ -68,7 +68,7 @@ def test_check_config_during_discover_without_dynamic_schema_loader(): } source = ManifestDeclarativeSource(source_config=source_config) - + assert source.check_config_during_discover is False assert source.check_config_against_spec is True From 08397ad88a309ab22dbc94ad42148781e58a7661 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Wed, 9 Apr 2025 09:11:31 -0700 Subject: [PATCH 21/21] fix condition direction --- airbyte_cdk/entrypoint.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/airbyte_cdk/entrypoint.py b/airbyte_cdk/entrypoint.py index 1b43f59f2..4c737007b 100644 --- a/airbyte_cdk/entrypoint.py +++ b/airbyte_cdk/entrypoint.py @@ -149,8 +149,7 @@ def run(self, parsed_args: argparse.Namespace) -> Iterable[str]: elif ( cmd == "discover" and not parsed_args.config - and hasattr(self.source, "check_config_during_discover") - and self.source.check_config_during_discover + and not self.source.check_config_during_discover ): # Connector supports unprivileged discover empty_config: dict[str, Any] = {}