Skip to content

Commit ba912fe

Browse files
Fix schema consolidation per CodeRabbit feedback
- Preserve main schema's internal definitions (don't overwrite them) - Only convert custom types to $ref, not JSON primitives (string, object, etc.) - Support YAML refs with fragments (e.g., Foo.yaml#/Bar) - Use regex to properly parse .yaml references with optional fragments Addresses CodeRabbit's critical feedback on bin/generate_connector_metadata_files.py Co-Authored-By: AJ Steers <[email protected]>
1 parent c686574 commit ba912fe

File tree

3 files changed

+26
-18
lines changed

3 files changed

+26
-18
lines changed

airbyte_cdk/test/models/connector_metadata/generated/metadata_schema.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
{
22
"$schema": "http://json-schema.org/draft-07/schema#",
3+
"$id": "https://github.com/airbytehq/airbyte/airbyte-ci/connectors_ci/metadata_service/lib/models/src/ConnectorMetadataDefinitionV0.yml",
34
"title": "ConnectorMetadataDefinitionV0",
45
"description": "describes the metadata of a connector",
5-
"$id": "https://github.com/airbytehq/airbyte/airbyte-ci/connectors_ci/metadata_service/lib/models/src/ConnectorMetadataDefinitionV0.yml",
66
"type": "object",
77
"required": [
88
"metadataSpecVersion",
@@ -1076,6 +1076,7 @@
10761076
],
10771077
"properties": {
10781078
"scopeType": {
1079+
"type": "const",
10791080
"const": "stream"
10801081
},
10811082
"impactedScopes": {

airbyte_cdk/test/models/connector_metadata/generated/models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ class StreamBreakingChangeScope(BaseModel):
289289
class Config:
290290
extra = Extra.forbid
291291

292-
scopeType: str = Field("stream", const=True)
292+
scopeType: Any = Field("stream", const=True)
293293
impactedScopes: List[str] = Field(
294294
...,
295295
description="List of streams that are impacted by the breaking change.",

bin/generate_connector_metadata_files.py

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
"""
1313

1414
import json
15+
import re
1516
import subprocess
1617
import sys
1718
import tempfile
@@ -72,6 +73,7 @@ def consolidate_yaml_schemas_to_json(yaml_dir_path: Path, output_json_path: Path
7273
schemas[schema_name] = schema_content
7374

7475
all_schema_names = set(schemas.keys())
76+
json_primitives = {"string", "number", "integer", "boolean", "object", "array", "null"}
7577

7678
for schema_content in schemas.values():
7779
if isinstance(schema_content, dict) and "definitions" in schema_content:
@@ -85,13 +87,18 @@ def fix_refs(obj, in_definition=False):
8587
if (key == "$id" or key == "$schema") and in_definition:
8688
continue
8789
elif key == "$ref" and isinstance(value, str):
88-
if value.endswith(".yaml"):
89-
schema_name = value.replace(".yaml", "")
90-
new_obj[key] = f"#/definitions/{schema_name}"
90+
m = re.match(r"(?:.*/)?(?P<name>[^/#]+)\.yaml(?P<frag>#.*)?$", value)
91+
if m:
92+
schema_name = m.group("name")
93+
frag = m.group("frag") or ""
94+
new_obj[key] = f"#/definitions/{schema_name}{frag}"
95+
else:
96+
new_obj[key] = value
97+
elif key == "type" and isinstance(value, str):
98+
if value in all_schema_names and value not in json_primitives:
99+
new_obj["$ref"] = f"#/definitions/{value}"
91100
else:
92101
new_obj[key] = value
93-
elif key == "type" and isinstance(value, str) and value in all_schema_names:
94-
new_obj["$ref"] = f"#/definitions/{value}"
95102
elif key == "type" and value == "const":
96103
pass
97104
else:
@@ -106,26 +113,26 @@ def fix_refs(obj, in_definition=False):
106113
main_schema = schemas.get("ConnectorMetadataDefinitionV0")
107114

108115
if main_schema:
109-
# Create a consolidated schema with definitions
110-
consolidated = {
111-
"$schema": main_schema.get("$schema", "http://json-schema.org/draft-07/schema#"),
112-
"title": "Connector Metadata Schema",
113-
"description": "Consolidated JSON schema for Airbyte connector metadata validation",
114-
**main_schema,
115-
"definitions": {},
116-
}
116+
# Create a consolidated schema preserving main schema structure
117+
consolidated = dict(main_schema) # shallow copy
118+
consolidated.setdefault("$schema", "http://json-schema.org/draft-07/schema#")
119+
consolidated.setdefault("title", "Connector Metadata Schema")
120+
consolidated.setdefault("description", "Consolidated JSON schema for Airbyte connector metadata validation")
121+
122+
consolidated_definitions = dict(consolidated.get("definitions", {}))
117123

118124
# Add all schemas (including their internal definitions) as top-level definitions
119125
for schema_name, schema_content in schemas.items():
120126
if schema_name != "ConnectorMetadataDefinitionV0":
121127
if isinstance(schema_content, dict) and "definitions" in schema_content:
122128
for def_name, def_content in schema_content["definitions"].items():
123-
consolidated["definitions"][def_name] = fix_refs(def_content, in_definition=True)
129+
consolidated_definitions[def_name] = fix_refs(def_content, in_definition=True)
124130
schema_without_defs = {k: v for k, v in schema_content.items() if k != "definitions"}
125-
consolidated["definitions"][schema_name] = fix_refs(schema_without_defs, in_definition=True)
131+
consolidated_definitions[schema_name] = fix_refs(schema_without_defs, in_definition=True)
126132
else:
127-
consolidated["definitions"][schema_name] = fix_refs(schema_content, in_definition=True)
133+
consolidated_definitions[schema_name] = fix_refs(schema_content, in_definition=True)
128134

135+
consolidated["definitions"] = consolidated_definitions
129136
consolidated = fix_refs(consolidated, in_definition=False)
130137

131138
output_json_path.write_text(json.dumps(consolidated, indent=2))

0 commit comments

Comments
 (0)