Skip to content

Commit 3878e23

Browse files
feat(metadata): vendor YAML schemas and add CLI validation command
- Vendor metadata schema YAML files from monorepo into CDK resources directory - Include fixes for SecretStore required fields and ConnectorBreakingChanges const usage - Add validate_metadata_file() function with Pydantic validation - Add 'airbyte-cdk metadata validate' CLI command with JSON and text output - Update generation script to use vendored YAML files instead of cloning from GitHub - Regenerate metadata_schema.json with fixed YAML files - Reorganize connector_metadata module into package structure This makes the CDK self-contained for metadata validation and removes dependency on the monorepo for schema files. Co-Authored-By: AJ Steers <[email protected]>
1 parent 3c2a4f8 commit 3878e23

37 files changed

+1949
-881
lines changed

airbyte_cdk/cli/airbyte_cdk/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
from airbyte_cdk.cli.airbyte_cdk._connector import connector_cli_group
4545
from airbyte_cdk.cli.airbyte_cdk._image import image_cli_group
4646
from airbyte_cdk.cli.airbyte_cdk._manifest import manifest_cli_group
47+
from airbyte_cdk.cli.airbyte_cdk._metadata import metadata_cli_group
4748
from airbyte_cdk.cli.airbyte_cdk._secrets import secrets_cli_group
4849
from airbyte_cdk.cli.airbyte_cdk._version import print_version
4950

@@ -78,6 +79,7 @@ def cli(
7879

7980
cli.add_command(connector_cli_group)
8081
cli.add_command(manifest_cli_group)
82+
cli.add_command(metadata_cli_group)
8183
cli.add_command(image_cli_group)
8284
cli.add_command(secrets_cli_group)
8385

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2+
"""CLI commands for metadata validation."""
3+
4+
import json
5+
import sys
6+
from pathlib import Path
7+
8+
import rich_click as click
9+
10+
from airbyte_cdk.models.connector_metadata import validate_metadata_file
11+
12+
13+
@click.group(name="metadata")
14+
def metadata_cli_group() -> None:
15+
"""Commands for working with connector metadata."""
16+
pass
17+
18+
19+
@metadata_cli_group.command(name="validate")
20+
@click.option(
21+
"--file",
22+
"-f",
23+
"file_path",
24+
type=click.Path(exists=True, path_type=Path),
25+
required=True,
26+
help="Path to the metadata.yaml file to validate",
27+
)
28+
@click.option(
29+
"--format",
30+
"output_format",
31+
type=click.Choice(["json", "text"]),
32+
default="text",
33+
help="Output format (json or text)",
34+
)
35+
def validate_command(file_path: Path, output_format: str) -> None:
36+
"""Validate a connector metadata.yaml file.
37+
38+
This command validates a metadata.yaml file against the connector metadata schema
39+
and reports any validation errors.
40+
41+
Examples:
42+
airbyte-cdk metadata validate --file metadata.yaml
43+
airbyte-cdk metadata validate --file metadata.yaml --format json
44+
"""
45+
result = validate_metadata_file(file_path)
46+
47+
if output_format == "json":
48+
click.echo(result.model_dump_json(indent=2))
49+
else:
50+
if result.valid:
51+
click.secho("✓ Metadata file is valid", fg="green")
52+
else:
53+
click.secho("✗ Metadata file is invalid", fg="red")
54+
click.echo()
55+
click.echo("Errors:")
56+
for error in result.errors:
57+
error_type = error.get("type", "unknown")
58+
path = error.get("path", "")
59+
message = error.get("message", "")
60+
61+
if path:
62+
click.echo(f" • {path}: {message} (type: {error_type})")
63+
else:
64+
click.echo(f" • {message} (type: {error_type})")
65+
66+
sys.exit(0 if result.valid else 1)
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
"""Connector metadata models and validation."""
2+
3+
from airbyte_cdk.models.connector_metadata.metadata_file import (
4+
ConnectorBuildOptions,
5+
ConnectorLanguage,
6+
ConnectorMetadata,
7+
MetadataFile,
8+
SuggestedStreams,
9+
ValidationResult,
10+
validate_metadata_file,
11+
)
12+
13+
__all__ = [
14+
"ConnectorBuildOptions",
15+
"ConnectorLanguage",
16+
"ConnectorMetadata",
17+
"MetadataFile",
18+
"SuggestedStreams",
19+
"ValidationResult",
20+
"validate_metadata_file",
21+
]

airbyte_cdk/models/connector_metadata.py renamed to airbyte_cdk/models/connector_metadata/metadata_file.py

Lines changed: 89 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,13 @@
22

33
from __future__ import annotations
44

5+
import json
56
from enum import Enum
67
from pathlib import Path
8+
from typing import Any
79

810
import yaml
9-
from pydantic import BaseModel, Field
11+
from pydantic import BaseModel, Field, ValidationError
1012

1113

1214
class ConnectorLanguage(str, Enum):
@@ -109,3 +111,89 @@ def from_file(
109111

110112
metadata_file = MetadataFile.model_validate(metadata_dict)
111113
return metadata_file
114+
115+
116+
class ValidationResult(BaseModel):
117+
"""Result of metadata validation."""
118+
119+
valid: bool = Field(..., description="Whether the metadata is valid")
120+
errors: list[dict[str, Any]] = Field(
121+
default_factory=list, description="List of validation errors"
122+
)
123+
metadata: dict[str, Any] | None = Field(None, description="Parsed metadata if available")
124+
125+
126+
def validate_metadata_file(file_path: Path) -> ValidationResult:
127+
"""Validate a metadata.yaml file.
128+
129+
Args:
130+
file_path: Path to the metadata.yaml file to validate
131+
132+
Returns:
133+
ValidationResult with validation status, errors, and parsed metadata
134+
"""
135+
errors = []
136+
metadata_dict = None
137+
138+
try:
139+
if not file_path.exists():
140+
return ValidationResult(
141+
valid=False,
142+
errors=[
143+
{"type": "file_not_found", "message": f"Metadata file not found: {file_path}"}
144+
],
145+
metadata=None,
146+
)
147+
148+
try:
149+
metadata_content = file_path.read_text()
150+
metadata_dict = yaml.safe_load(metadata_content)
151+
except yaml.YAMLError as e:
152+
return ValidationResult(
153+
valid=False,
154+
errors=[{"type": "yaml_parse_error", "message": f"Failed to parse YAML: {e}"}],
155+
metadata=None,
156+
)
157+
158+
if not metadata_dict or "data" not in metadata_dict:
159+
return ValidationResult(
160+
valid=False,
161+
errors=[
162+
{
163+
"type": "missing_field",
164+
"path": "data",
165+
"message": "Missing 'data' field in metadata",
166+
}
167+
],
168+
metadata=metadata_dict,
169+
)
170+
171+
try:
172+
metadata_file = MetadataFile.model_validate(metadata_dict)
173+
return ValidationResult(
174+
valid=True,
175+
errors=[],
176+
metadata=metadata_dict,
177+
)
178+
except ValidationError as e:
179+
for error in e.errors():
180+
errors.append(
181+
{
182+
"type": error["type"],
183+
"path": ".".join(str(loc) for loc in error["loc"]),
184+
"message": error["msg"],
185+
}
186+
)
187+
188+
return ValidationResult(
189+
valid=False,
190+
errors=errors,
191+
metadata=metadata_dict,
192+
)
193+
194+
except Exception as e:
195+
return ValidationResult(
196+
valid=False,
197+
errors=[{"type": "unexpected_error", "message": f"Unexpected error: {e}"}],
198+
metadata=metadata_dict,
199+
)
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
---
2+
"$schema": http://json-schema.org/draft-07/schema#
3+
"$id": https://github.com/airbytehq/airbyte/airbyte-ci/connectors/metadata_service/lib/metadata_service/models/src/ActorDefinitionResourceRequirements.yaml
4+
title: ActorDefinitionResourceRequirements
5+
description: actor definition specific resource requirements
6+
type: object
7+
# set to false because we need the validations on seeds to be strict. otherwise, we will just add whatever is in the seed file into the db.
8+
additionalProperties: false
9+
properties:
10+
default:
11+
description: if set, these are the requirements that should be set for ALL jobs run for this actor definition.
12+
"$ref": ResourceRequirements.yaml
13+
jobSpecific:
14+
type: array
15+
items:
16+
"$ref": "#/definitions/JobTypeResourceLimit"
17+
definitions:
18+
JobTypeResourceLimit:
19+
description: sets resource requirements for a specific job type for an actor definition. these values override the default, if both are set.
20+
type: object
21+
# set to false because we need the validations on seeds to be strict. otherwise, we will just add whatever is in the seed file into the db.
22+
additionalProperties: false
23+
required:
24+
- jobType
25+
- resourceRequirements
26+
properties:
27+
jobType:
28+
"$ref": JobType.yaml
29+
resourceRequirements:
30+
"$ref": ResourceRequirements.yaml
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
---
2+
"$schema": http://json-schema.org/draft-07/schema#
3+
"$id": https://github.com/airbytehq/airbyte/airbyte-ci/connectors_ci/metadata_service/lib/models/src/AirbyteInternal.yml
4+
title: AirbyteInternal
5+
description: Fields for internal use only
6+
type: object
7+
additionalProperties: true
8+
properties:
9+
sl:
10+
type: integer
11+
enum:
12+
- 0
13+
- 100
14+
- 200
15+
- 300
16+
ql:
17+
type: integer
18+
enum:
19+
- 0
20+
- 100
21+
- 200
22+
- 300
23+
- 400
24+
- 500
25+
- 600
26+
isEnterprise:
27+
type: boolean
28+
default: false
29+
requireVersionIncrementsInPullRequests:
30+
type: boolean
31+
default: true
32+
description: When false, version increment checks will be skipped for this connector
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
---
2+
"$schema": http://json-schema.org/draft-07/schema#
3+
"$id": https://github.com/airbytehq/airbyte/airbyte-ci/connectors/metadata_service/lib/metadata_service/models/src/AllowedHosts.yaml
4+
title: AllowedHosts
5+
description: A connector's allowed hosts. If present, the platform will limit communication to only hosts which are listed in `AllowedHosts.hosts`.
6+
type: object
7+
additionalProperties: true
8+
properties:
9+
hosts:
10+
type: array
11+
description: An array of hosts that this connector can connect to. AllowedHosts not being present for the source or destination means that access to all hosts is allowed. An empty list here means that no network access is granted.
12+
items:
13+
type: string
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
---
2+
"$schema": http://json-schema.org/draft-07/schema#
3+
"$id": https://github.com/airbytehq/airbyte/airbyte-ci/connectors/metadata_service/lib/metadata_service/models/src/ConnectorBreakingChanges.yaml
4+
title: ConnectorBreakingChanges
5+
description: Each entry denotes a breaking change in a specific version of a connector that requires user action to upgrade.
6+
type: object
7+
additionalProperties: false
8+
minProperties: 1
9+
patternProperties:
10+
"^\\d+\\.\\d+\\.\\d+$":
11+
$ref: "#/definitions/VersionBreakingChange"
12+
definitions:
13+
VersionBreakingChange:
14+
description: Contains information about a breaking change, including the deadline to upgrade and a message detailing the change.
15+
type: object
16+
additionalProperties: false
17+
required:
18+
- upgradeDeadline
19+
- message
20+
properties:
21+
upgradeDeadline:
22+
description: The deadline by which to upgrade before the breaking change takes effect.
23+
type: string
24+
format: date
25+
message:
26+
description: Descriptive message detailing the breaking change.
27+
type: string
28+
deadlineAction:
29+
description: Action to do when the deadline is reached.
30+
type: string
31+
enum:
32+
- auto_upgrade
33+
- disable
34+
migrationDocumentationUrl:
35+
description: URL to documentation on how to migrate to the current version. Defaults to ${documentationUrl}-migrations#${version}
36+
type: string
37+
format: uri
38+
scopedImpact:
39+
description: List of scopes that are impacted by the breaking change. If not specified, the breaking change cannot be scoped to reduce impact via the supported scope types.
40+
type: array
41+
minItems: 1
42+
items:
43+
$ref: "#/definitions/BreakingChangeScope"
44+
BreakingChangeScope:
45+
description: A scope that can be used to limit the impact of a breaking change.
46+
type: object
47+
oneOf:
48+
- $ref: "#/definitions/StreamBreakingChangeScope"
49+
StreamBreakingChangeScope:
50+
description: A scope that can be used to limit the impact of a breaking change to specific streams.
51+
type: object
52+
additionalProperties: false
53+
required:
54+
- scopeType
55+
- impactedScopes
56+
properties:
57+
scopeType:
58+
type: string
59+
const: stream
60+
impactedScopes:
61+
description: List of streams that are impacted by the breaking change.
62+
type: array
63+
minItems: 1
64+
items:
65+
type: string
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
---
2+
"$schema": http://json-schema.org/draft-07/schema#
3+
"$id": https://github.com/airbytehq/airbyte/airbyte-ci/connectors/metadata_service/lib/metadata_service/models/src/ConnectorBuildOptions.yaml
4+
title: ConnectorBuildOptions
5+
description: metadata specific to the build process.
6+
type: object
7+
additionalProperties: false
8+
properties:
9+
baseImage:
10+
type: string
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
---
2+
"$schema": http://json-schema.org/draft-07/schema#
3+
"$id": https://github.com/airbytehq/airbyte/airbyte-ci/connectors/metadata_service/lib/metadata_service/models/src/ConnectorIPCOptions.yaml
4+
title: ConnectorIPCOptions
5+
type: object
6+
required:
7+
- dataChannel
8+
additionalProperties: false
9+
properties:
10+
dataChannel:
11+
type: object
12+
required:
13+
- version
14+
- supportedSerialization
15+
- supportedTransport
16+
additionalProperties: false
17+
properties:
18+
version:
19+
type: string
20+
supportedSerialization:
21+
type: array
22+
items:
23+
type: string
24+
enum: ["JSONL", "PROTOBUF", "FLATBUFFERS"]
25+
supportedTransport:
26+
type: array
27+
items:
28+
type: string
29+
enum: ["STDIO", "SOCKET"]

0 commit comments

Comments
 (0)