1212"""
1313
1414import json
15+ import re
1516import subprocess
1617import sys
1718import tempfile
@@ -72,6 +73,7 @@ def consolidate_yaml_schemas_to_json(yaml_dir_path: Path, output_json_path: Path
7273 schemas [schema_name ] = schema_content
7374
7475 all_schema_names = set (schemas .keys ())
76+ json_primitives = {"string" , "number" , "integer" , "boolean" , "object" , "array" , "null" }
7577
7678 for schema_content in schemas .values ():
7779 if isinstance (schema_content , dict ) and "definitions" in schema_content :
@@ -85,13 +87,18 @@ def fix_refs(obj, in_definition=False):
8587 if (key == "$id" or key == "$schema" ) and in_definition :
8688 continue
8789 elif key == "$ref" and isinstance (value , str ):
88- if value .endswith (".yaml" ):
89- schema_name = value .replace (".yaml" , "" )
90- new_obj [key ] = f"#/definitions/{ schema_name } "
90+ m = re .match (r"(?:.*/)?(?P<name>[^/#]+)\.yaml(?P<frag>#.*)?$" , value )
91+ if m :
92+ schema_name = m .group ("name" )
93+ frag = m .group ("frag" ) or ""
94+ new_obj [key ] = f"#/definitions/{ schema_name } { frag } "
95+ else :
96+ new_obj [key ] = value
97+ elif key == "type" and isinstance (value , str ):
98+ if value in all_schema_names and value not in json_primitives :
99+ new_obj ["$ref" ] = f"#/definitions/{ value } "
91100 else :
92101 new_obj [key ] = value
93- elif key == "type" and isinstance (value , str ) and value in all_schema_names :
94- new_obj ["$ref" ] = f"#/definitions/{ value } "
95102 elif key == "type" and value == "const" :
96103 pass
97104 else :
@@ -106,26 +113,26 @@ def fix_refs(obj, in_definition=False):
106113 main_schema = schemas .get ("ConnectorMetadataDefinitionV0" )
107114
108115 if main_schema :
109- # Create a consolidated schema with definitions
110- consolidated = {
111- "$schema" : main_schema .get ("$schema" , "http://json-schema.org/draft-07/schema#" ),
112- "title" : "Connector Metadata Schema" ,
113- "description" : "Consolidated JSON schema for Airbyte connector metadata validation" ,
114- ** main_schema ,
115- "definitions" : {},
116- }
116+ # Create a consolidated schema preserving main schema structure
117+ consolidated = dict (main_schema ) # shallow copy
118+ consolidated .setdefault ("$schema" , "http://json-schema.org/draft-07/schema#" )
119+ consolidated .setdefault ("title" , "Connector Metadata Schema" )
120+ consolidated .setdefault ("description" , "Consolidated JSON schema for Airbyte connector metadata validation" )
121+
122+ consolidated_definitions = dict (consolidated .get ("definitions" , {}))
117123
118124 # Add all schemas (including their internal definitions) as top-level definitions
119125 for schema_name , schema_content in schemas .items ():
120126 if schema_name != "ConnectorMetadataDefinitionV0" :
121127 if isinstance (schema_content , dict ) and "definitions" in schema_content :
122128 for def_name , def_content in schema_content ["definitions" ].items ():
123- consolidated [ "definitions" ] [def_name ] = fix_refs (def_content , in_definition = True )
129+ consolidated_definitions [def_name ] = fix_refs (def_content , in_definition = True )
124130 schema_without_defs = {k : v for k , v in schema_content .items () if k != "definitions" }
125- consolidated [ "definitions" ] [schema_name ] = fix_refs (schema_without_defs , in_definition = True )
131+ consolidated_definitions [schema_name ] = fix_refs (schema_without_defs , in_definition = True )
126132 else :
127- consolidated [ "definitions" ] [schema_name ] = fix_refs (schema_content , in_definition = True )
133+ consolidated_definitions [schema_name ] = fix_refs (schema_content , in_definition = True )
128134
135+ consolidated ["definitions" ] = consolidated_definitions
129136 consolidated = fix_refs (consolidated , in_definition = False )
130137
131138 output_json_path .write_text (json .dumps (consolidated , indent = 2 ))
0 commit comments