Skip to content

Commit 61e9648

Browse files
refactor: use URL-based schema loading instead of vendored YAML files
Replace vendored YAML schema files with URL-based schema loading from the monorepo. This removes the dependency on vendoring schemas and allows the CDK to fetch the latest schema from a canonical source. Changes: - Remove vendored YAML files from airbyte_cdk/models/connector_metadata/resources/ - Remove bin/generate_connector_metadata_files.py schema generation script - Add get_metadata_schema() utility function to fetch schema from URL or file path - Refactor validate_metadata_file() to use JSON schema validation via jsonschema library - Add --schema flag to CLI for custom schema path/URL (defaults to monorepo URL) - Add comprehensive unit tests with local schema (no network dependency) - Add TODO comment to update URL to master branch after associated PR merges The default schema URL points to a specific commit in the monorepo: https://raw.githubusercontent.com/airbytehq/airbyte/61048d88732df93c50bd3da490de8d3cc1aa66b0/airbyte-ci/connectors/metadata_service/lib/metadata_service/models/generated/ConnectorMetadataDefinitionV0.json This will be updated to use the master branch URL after the associated PR merges. Co-Authored-By: AJ Steers <[email protected]>
1 parent f58eec1 commit 61e9648

36 files changed

+254
-1122
lines changed

airbyte_cdk/cli/airbyte_cdk/_metadata.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,22 @@ def metadata_cli_group() -> None:
2525
required=True,
2626
help="Path to the metadata.yaml file to validate",
2727
)
28+
@click.option(
29+
"--schema",
30+
"-s",
31+
"schema_source",
32+
type=str,
33+
default=None,
34+
help="URL or file path to JSON schema (defaults to monorepo schema)",
35+
)
2836
@click.option(
2937
"--format",
3038
"output_format",
3139
type=click.Choice(["json", "text"]),
3240
default="text",
3341
help="Output format (json or text)",
3442
)
35-
def validate_command(file_path: Path, output_format: str) -> None:
43+
def validate_command(file_path: Path, schema_source: str | None, output_format: str) -> None:
3644
"""Validate a connector metadata.yaml file.
3745
3846
This command validates a metadata.yaml file against the connector metadata schema
@@ -41,8 +49,9 @@ def validate_command(file_path: Path, output_format: str) -> None:
4149
Examples:
4250
airbyte-cdk metadata validate --file metadata.yaml
4351
airbyte-cdk metadata validate --file metadata.yaml --format json
52+
airbyte-cdk metadata validate --file metadata.yaml --schema /path/to/schema.json
4453
"""
45-
result = validate_metadata_file(file_path)
54+
result = validate_metadata_file(file_path, schema_source)
4655

4756
if output_format == "json":
4857
click.echo(result.model_dump_json(indent=2))

airbyte_cdk/models/connector_metadata/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
MetadataFile,
88
SuggestedStreams,
99
ValidationResult,
10+
get_metadata_schema,
1011
validate_metadata_file,
1112
)
1213

@@ -17,5 +18,6 @@
1718
"MetadataFile",
1819
"SuggestedStreams",
1920
"ValidationResult",
21+
"get_metadata_schema",
2022
"validate_metadata_file",
2123
]

airbyte_cdk/models/connector_metadata/metadata_file.py

Lines changed: 62 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,16 @@
66
from enum import Enum
77
from pathlib import Path
88
from typing import Any
9+
from urllib.request import urlopen
910

11+
import jsonschema
1012
import yaml
1113
from pydantic import BaseModel, Field, ValidationError
1214

15+
# TODO: Update to master branch URL after associated PR merges
16+
# https://raw.githubusercontent.com/airbytehq/airbyte/master/airbyte-ci/connectors/metadata_service/lib/metadata_service/models/generated/ConnectorMetadataDefinitionV0.json
17+
DEFAULT_SCHEMA_URL = "https://raw.githubusercontent.com/airbytehq/airbyte/61048d88732df93c50bd3da490de8d3cc1aa66b0/airbyte-ci/connectors/metadata_service/lib/metadata_service/models/generated/ConnectorMetadataDefinitionV0.json"
18+
1319

1420
class ConnectorLanguage(str, Enum):
1521
"""Connector implementation language."""
@@ -123,11 +129,41 @@ class ValidationResult(BaseModel):
123129
metadata: dict[str, Any] | None = Field(None, description="Parsed metadata if available")
124130

125131

126-
def validate_metadata_file(file_path: Path) -> ValidationResult:
127-
"""Validate a metadata.yaml file.
132+
def get_metadata_schema(schema_source: str | Path | None = None) -> dict[str, Any]:
133+
"""Load metadata JSON schema from URL or file path.
134+
135+
Args:
136+
schema_source: URL or file path to JSON schema. If None, uses DEFAULT_SCHEMA_URL.
137+
138+
Returns:
139+
Parsed JSON schema as dictionary
140+
"""
141+
if schema_source is None:
142+
schema_source = DEFAULT_SCHEMA_URL
143+
144+
if isinstance(schema_source, Path) or (
145+
isinstance(schema_source, str) and not schema_source.startswith(("http://", "https://"))
146+
):
147+
schema_path = Path(schema_source)
148+
if not schema_path.exists():
149+
raise FileNotFoundError(f"Schema file not found: {schema_path}")
150+
return json.loads(schema_path.read_text())
151+
152+
try:
153+
with urlopen(schema_source, timeout=10) as response:
154+
return json.loads(response.read().decode("utf-8"))
155+
except Exception as e:
156+
raise RuntimeError(f"Failed to fetch schema from {schema_source}: {e}") from e
157+
158+
159+
def validate_metadata_file(
160+
file_path: Path, schema_source: str | Path | None = None
161+
) -> ValidationResult:
162+
"""Validate a metadata.yaml file against JSON schema.
128163
129164
Args:
130165
file_path: Path to the metadata.yaml file to validate
166+
schema_source: URL or file path to JSON schema. If None, uses DEFAULT_SCHEMA_URL.
131167
132168
Returns:
133169
ValidationResult with validation status, errors, and parsed metadata
@@ -169,27 +205,40 @@ def validate_metadata_file(file_path: Path) -> ValidationResult:
169205
)
170206

171207
try:
172-
metadata_file = MetadataFile.model_validate(metadata_dict)
208+
schema = get_metadata_schema(schema_source)
209+
except Exception as e:
210+
return ValidationResult(
211+
valid=False,
212+
errors=[{"type": "schema_load_error", "message": f"Failed to load schema: {e}"}],
213+
metadata=metadata_dict,
214+
)
215+
216+
try:
217+
jsonschema.validate(instance=metadata_dict, schema=schema)
173218
return ValidationResult(
174219
valid=True,
175220
errors=[],
176221
metadata=metadata_dict,
177222
)
178-
except ValidationError as e:
179-
for error in e.errors():
180-
errors.append(
181-
{
182-
"type": error["type"],
183-
"path": ".".join(str(loc) for loc in error["loc"]),
184-
"message": error["msg"],
185-
}
186-
)
187-
223+
except jsonschema.ValidationError as e:
224+
errors.append(
225+
{
226+
"type": "validation_error",
227+
"path": ".".join(str(p) for p in e.absolute_path) if e.absolute_path else "",
228+
"message": e.message,
229+
}
230+
)
188231
return ValidationResult(
189232
valid=False,
190233
errors=errors,
191234
metadata=metadata_dict,
192235
)
236+
except jsonschema.SchemaError as e:
237+
return ValidationResult(
238+
valid=False,
239+
errors=[{"type": "schema_error", "message": f"Invalid schema: {e.message}"}],
240+
metadata=metadata_dict,
241+
)
193242

194243
except Exception as e:
195244
return ValidationResult(

airbyte_cdk/models/connector_metadata/resources/ActorDefinitionResourceRequirements.yaml

Lines changed: 0 additions & 30 deletions
This file was deleted.

airbyte_cdk/models/connector_metadata/resources/AirbyteInternal.yaml

Lines changed: 0 additions & 32 deletions
This file was deleted.

airbyte_cdk/models/connector_metadata/resources/AllowedHosts.yaml

Lines changed: 0 additions & 13 deletions
This file was deleted.

airbyte_cdk/models/connector_metadata/resources/ConnectorBreakingChanges.yaml

Lines changed: 0 additions & 65 deletions
This file was deleted.

airbyte_cdk/models/connector_metadata/resources/ConnectorBuildOptions.yaml

Lines changed: 0 additions & 10 deletions
This file was deleted.

airbyte_cdk/models/connector_metadata/resources/ConnectorIPCOptions.yaml

Lines changed: 0 additions & 29 deletions
This file was deleted.

0 commit comments

Comments
 (0)