1717import tempfile
1818from pathlib import Path
1919
20- import anyio
21- import dagger
22- import yaml
20+ try :
21+ import yaml
22+ except ImportError :
23+ print ("Error: pyyaml is required. Install with: pip install pyyaml" , file = sys .stderr )
24+ sys .exit (1 )
2325
24- PYTHON_IMAGE = "python:3.10"
2526OUTPUT_DIR_PATH = "airbyte_cdk/test/models/connector_metadata/generated"
2627AIRBYTE_REPO_URL = "https://github.com/airbytehq/airbyte.git"
2728SCHEMA_PATH = "airbyte-ci/connectors/metadata_service/lib/metadata_service/models/src"
28-
29- PIP_DEPENDENCIES = [
30- "datamodel_code_generator==0.26.3" ,
31- ]
29+ DATAMODEL_CODEGEN_VERSION = "0.26.3"
3230
3331
3432def clone_schemas_from_github (temp_dir : Path ) -> Path :
@@ -64,29 +62,27 @@ def clone_schemas_from_github(temp_dir: Path) -> Path:
6462 return schemas_dir
6563
6664
67- async def generate_models_single_file (
68- dagger_client : dagger . Client ,
69- yaml_dir_path : str ,
70- output_file_path : str ,
65+ def generate_models_single_file (
66+ yaml_dir_path : Path ,
67+ output_file_path : Path ,
68+ temp_dir : Path ,
7169) -> None :
7270 """Generate all metadata models into a single Python file using datamodel-codegen."""
73- codegen_container = (
74- dagger_client .container ()
75- .from_ (PYTHON_IMAGE )
76- .with_exec (["mkdir" , "-p" , "/generated_temp" ], use_entrypoint = True )
77- .with_exec (["pip" , "install" , " " .join (PIP_DEPENDENCIES )], use_entrypoint = True )
78- .with_mounted_directory (
79- "/yaml" , dagger_client .host ().directory (yaml_dir_path , include = ["*.yaml" ])
80- )
81- )
71+ generated_temp = temp_dir / "generated_temp"
72+ generated_temp .mkdir (parents = True , exist_ok = True )
73+
74+ print ("Running datamodel-codegen via uvx..." , file = sys .stderr )
8275
83- codegen_container = codegen_container . with_exec (
76+ subprocess . run (
8477 [
78+ "uvx" ,
79+ "--from" ,
80+ f"datamodel-code-generator=={ DATAMODEL_CODEGEN_VERSION } " ,
8581 "datamodel-codegen" ,
8682 "--input" ,
87- "/yaml" ,
83+ str ( yaml_dir_path ) ,
8884 "--output" ,
89- "/ generated_temp" ,
85+ str ( generated_temp ) ,
9086 "--disable-timestamp" ,
9187 "--enum-field-as-literal" ,
9288 "one" ,
@@ -97,53 +93,52 @@ async def generate_models_single_file(
9793 "deprecated" ,
9894 "deprecation_message" ,
9995 ],
100- use_entrypoint = True ,
96+ check = True ,
10197 )
10298
103- generated_files = await codegen_container .directory ("/generated_temp" ).entries ()
104-
10599 future_imports = set ()
106100 stdlib_imports = set ()
107101 third_party_imports = set ()
108102 classes_and_updates = []
109103
110- for file_name in sorted (generated_files ):
111- if file_name .endswith (".py" ) and file_name != "__init__.py" :
112- content = await codegen_container .file (f"/generated_temp/{ file_name } " ).contents ()
113-
114- lines = content .split ("\n " )
115- in_imports = True
116- in_relative_import_block = False
117- class_content = []
118-
119- for line in lines :
120- if in_imports :
121- if line .startswith ("from __future__" ):
122- future_imports .add (line )
123- elif (
124- line .startswith ("from datetime" )
125- or line .startswith ("from enum" )
126- or line .startswith ("from typing" )
127- or line .startswith ("from uuid" )
128- ):
129- stdlib_imports .add (line )
130- elif line .startswith ("from pydantic" ) or line .startswith ("import " ):
131- third_party_imports .add (line )
132- elif line .startswith ("from ." ):
133- in_relative_import_block = True
134- if not line .rstrip ().endswith ("," ) and not line .rstrip ().endswith ("(" ):
135- in_relative_import_block = False
136- elif in_relative_import_block :
137- if line .strip ().endswith (")" ):
138- in_relative_import_block = False
139- elif line .strip () and not line .startswith ("#" ):
140- in_imports = False
141- class_content .append (line )
142- else :
104+ for py_file in sorted (generated_temp .glob ("*.py" )):
105+ if py_file .name == "__init__.py" :
106+ continue
107+
108+ content = py_file .read_text ()
109+ lines = content .split ("\n " )
110+ in_imports = True
111+ in_relative_import_block = False
112+ class_content = []
113+
114+ for line in lines :
115+ if in_imports :
116+ if line .startswith ("from __future__" ):
117+ future_imports .add (line )
118+ elif (
119+ line .startswith ("from datetime" )
120+ or line .startswith ("from enum" )
121+ or line .startswith ("from typing" )
122+ or line .startswith ("from uuid" )
123+ ):
124+ stdlib_imports .add (line )
125+ elif line .startswith ("from pydantic" ) or line .startswith ("import " ):
126+ third_party_imports .add (line )
127+ elif line .startswith ("from ." ):
128+ in_relative_import_block = True
129+ if not line .rstrip ().endswith ("," ) and not line .rstrip ().endswith ("(" ):
130+ in_relative_import_block = False
131+ elif in_relative_import_block :
132+ if line .strip ().endswith (")" ):
133+ in_relative_import_block = False
134+ elif line .strip () and not line .startswith ("#" ):
135+ in_imports = False
143136 class_content .append (line )
137+ else :
138+ class_content .append (line )
144139
145- if class_content :
146- classes_and_updates .append ("\n " .join (class_content ))
140+ if class_content :
141+ classes_and_updates .append ("\n " .join (class_content ))
147142
148143 import_sections = []
149144 if future_imports :
@@ -177,22 +172,18 @@ async def generate_models_single_file(
177172
178173 post_processed_content = "\n " .join (filtered_lines )
179174
180- codegen_container = codegen_container .with_new_file (
181- "/generated/models.py" , contents = post_processed_content
182- )
183-
184- await codegen_container .file ("/generated/models.py" ).export (output_file_path )
175+ output_file_path .write_text (post_processed_content )
176+ print (f"Generated models: { output_file_path } " , file = sys .stderr )
185177
186178
187- def consolidate_yaml_schemas_to_json (yaml_dir_path : Path , output_json_path : str ) -> None :
179+ def consolidate_yaml_schemas_to_json (yaml_dir_path : Path , output_json_path : Path ) -> None :
188180 """Consolidate all YAML schemas into a single JSON schema file."""
189181 schemas = {}
190182
191183 for yaml_file in yaml_dir_path .glob ("*.yaml" ):
192184 schema_name = yaml_file .stem
193- with yaml_file .open ("r" ) as f :
194- schema_content = yaml .safe_load (f )
195- schemas [schema_name ] = schema_content
185+ schema_content = yaml .safe_load (yaml_file .read_text ())
186+ schemas [schema_name ] = schema_content
196187
197188 all_schema_names = set (schemas .keys ())
198189
@@ -251,41 +242,40 @@ def fix_refs(obj, in_definition=False):
251242
252243 consolidated = fix_refs (consolidated , in_definition = False )
253244
254- Path ( output_json_path ) .write_text (json .dumps (consolidated , indent = 2 ))
245+ output_json_path .write_text (json .dumps (consolidated , indent = 2 ))
255246 print (f"Generated consolidated JSON schema: { output_json_path } " , file = sys .stderr )
256247 else :
257248 print (
258249 "Warning: ConnectorMetadataDefinitionV0 not found, generating simple consolidation" ,
259250 file = sys .stderr ,
260251 )
261- Path ( output_json_path ) .write_text (json .dumps (schemas , indent = 2 ))
252+ output_json_path .write_text (json .dumps (schemas , indent = 2 ))
262253
263254
264- async def main ():
265- async with dagger .Connection (dagger .Config (log_output = sys .stderr )) as dagger_client :
266- print ("Generating connector metadata models..." , file = sys .stderr )
255+ def main ():
256+ print ("Generating connector metadata models..." , file = sys .stderr )
267257
268- with tempfile .TemporaryDirectory () as temp_dir :
269- temp_path = Path (temp_dir )
270- schemas_dir = clone_schemas_from_github (temp_path )
258+ with tempfile .TemporaryDirectory () as temp_dir :
259+ temp_path = Path (temp_dir )
260+ schemas_dir = clone_schemas_from_github (temp_path )
271261
272- output_dir = Path (OUTPUT_DIR_PATH )
273- output_dir .mkdir (parents = True , exist_ok = True )
262+ output_dir = Path (OUTPUT_DIR_PATH )
263+ output_dir .mkdir (parents = True , exist_ok = True )
274264
275- print ("Generating single Python file with all models..." , file = sys .stderr )
276- output_file = str ( output_dir / "models.py" )
277- await generate_models_single_file (
278- dagger_client = dagger_client ,
279- yaml_dir_path = str ( schemas_dir ) ,
280- output_file_path = output_file ,
281- )
265+ print ("Generating single Python file with all models..." , file = sys .stderr )
266+ output_file = output_dir / "models.py"
267+ generate_models_single_file (
268+ yaml_dir_path = schemas_dir ,
269+ output_file_path = output_file ,
270+ temp_dir = temp_path ,
271+ )
282272
283- print ("Generating consolidated JSON schema..." , file = sys .stderr )
284- json_schema_file = str ( output_dir / "metadata_schema.json" )
285- consolidate_yaml_schemas_to_json (schemas_dir , json_schema_file )
273+ print ("Generating consolidated JSON schema..." , file = sys .stderr )
274+ json_schema_file = output_dir / "metadata_schema.json"
275+ consolidate_yaml_schemas_to_json (schemas_dir , json_schema_file )
286276
287- print ("Connector metadata model generation complete!" , file = sys .stderr )
277+ print ("Connector metadata model generation complete!" , file = sys .stderr )
288278
289279
290280if __name__ == "__main__" :
291- anyio . run ( main )
281+ main ( )
0 commit comments