1+ #!/usr/bin/env python3
2+ #
3+ # Usage:
4+ # > uv run bin/generate_component_manifest_files.py
5+ #
6+ # /// script
7+ # dependencies = [
8+ # "datamodel-code-generator==0.26.3",
9+ # "PyYAML>=6.0.1",
10+ # ]
11+ # ///
12+
113# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
214
315import json
16+ import os
417import re
18+ import shutil
19+ import subprocess
520import sys
21+ import tempfile
622from glob import glob
723from pathlib import Path
824
9- import anyio
10- import dagger
1125import yaml
1226
13- PYTHON_IMAGE = "python:3.10"
1427LOCAL_YAML_DIR_PATH = "airbyte_cdk/sources/declarative"
1528LOCAL_OUTPUT_DIR_PATH = "airbyte_cdk/sources/declarative/models"
1629
1730
18- PIP_DEPENDENCIES = [
19- "datamodel_code_generator==0.26.3" ,
20- ]
21-
22-
2331def get_all_yaml_files_without_ext () -> list [str ]:
2432 return [Path (f ).stem for f in glob (f"{ LOCAL_YAML_DIR_PATH } /*.yaml" )]
2533
@@ -119,15 +127,18 @@ def replace_base_model_for_classes_with_deprecated_fields(post_processed_content
119127 return post_processed_content
120128
121129
122- async def post_process_codegen (codegen_container : dagger . Container ):
123- codegen_container = codegen_container . with_exec (
124- [ "mkdir" , "/generated_post_processed" ], use_entrypoint = True
125- )
126- for generated_file in await codegen_container . directory ( "/generated" ). entries ( ):
130+ def post_process_codegen (generated_dir : str , post_processed_dir : str ):
131+ """Post-process generated files to fix pydantic imports and deprecated fields."""
132+ os . makedirs ( post_processed_dir , exist_ok = True )
133+
134+ for generated_file in os . listdir ( generated_dir ):
127135 if generated_file .endswith (".py" ):
128- original_content = await codegen_container .file (
129- f"/generated/{ generated_file } "
130- ).contents ()
136+ input_path = os .path .join (generated_dir , generated_file )
137+ output_path = os .path .join (post_processed_dir , generated_file )
138+
139+ with open (input_path , "r" ) as f :
140+ original_content = f .read ()
141+
131142 # the space before _parameters is intentional to avoid replacing things like `request_parameters:` with `requestparameters:`
132143 post_processed_content = original_content .replace (
133144 " _parameters:" , " parameters:"
@@ -137,56 +148,65 @@ async def post_process_codegen(codegen_container: dagger.Container):
137148 post_processed_content
138149 )
139150
140- codegen_container = codegen_container .with_new_file (
141- f"/generated_post_processed/{ generated_file } " , contents = post_processed_content
142- )
143- return codegen_container
151+ with open (output_path , "w" ) as f :
152+ f .write (post_processed_content )
144153
145154
146- async def main ():
155+ def main ():
147156 generate_json_schema ()
148157 init_module_content = generate_init_module_content ()
149158
150- async with dagger .Connection (dagger .Config (log_output = sys .stderr )) as dagger_client :
151- codegen_container = (
152- dagger_client .container ()
153- .from_ (PYTHON_IMAGE )
154- .with_exec (["mkdir" , "/generated" ], use_entrypoint = True )
155- .with_exec (["pip" , "install" , " " .join (PIP_DEPENDENCIES )], use_entrypoint = True )
156- .with_mounted_directory (
157- "/yaml" , dagger_client .host ().directory (LOCAL_YAML_DIR_PATH , include = ["*.yaml" ])
158- )
159- .with_new_file ("/generated/__init__.py" , contents = init_module_content )
160- )
161- for yaml_file in get_all_yaml_files_without_ext ():
162- codegen_container = codegen_container .with_exec (
163- [
164- "datamodel-codegen" ,
165- "--input" ,
166- f"/yaml/{ yaml_file } .yaml" ,
167- "--output" ,
168- f"/generated/{ yaml_file } .py" ,
169- "--disable-timestamp" ,
170- "--enum-field-as-literal" ,
171- "one" ,
172- "--set-default-enum-member" ,
173- "--use-double-quotes" ,
174- "--remove-special-field-name-prefix" ,
175- # allow usage of the extra key such as `deprecated`, etc.
176- "--field-extra-keys" ,
177- # account the `deprecated` flag provided for the field.
178- "deprecated" ,
179- # account the `deprecation_message` provided for the field.
180- "deprecation_message" ,
181- ],
182- use_entrypoint = True ,
183- )
159+ with tempfile .TemporaryDirectory () as temp_dir :
160+ generated_dir = os .path .join (temp_dir , "generated" )
161+ post_processed_dir = os .path .join (temp_dir , "generated_post_processed" )
184162
185- await (
186- (await post_process_codegen (codegen_container ))
187- .directory ("/generated_post_processed" )
188- .export (LOCAL_OUTPUT_DIR_PATH )
189- )
163+ os .makedirs (generated_dir , exist_ok = True )
190164
165+ init_file_path = os .path .join (generated_dir , "__init__.py" )
166+ with open (init_file_path , "w" ) as f :
167+ f .write (init_module_content )
191168
192- anyio .run (main )
169+ for yaml_file in get_all_yaml_files_without_ext ():
170+ input_yaml = os .path .join (LOCAL_YAML_DIR_PATH , f"{ yaml_file } .yaml" )
171+ output_py = os .path .join (generated_dir , f"{ yaml_file } .py" )
172+
173+ cmd = [
174+ "datamodel-codegen" ,
175+ "--input" ,
176+ input_yaml ,
177+ "--output" ,
178+ output_py ,
179+ "--disable-timestamp" ,
180+ "--enum-field-as-literal" ,
181+ "one" ,
182+ "--set-default-enum-member" ,
183+ "--use-double-quotes" ,
184+ "--remove-special-field-name-prefix" ,
185+ # allow usage of the extra key such as `deprecated`, etc.
186+ "--field-extra-keys" ,
187+ # account the `deprecated` flag provided for the field.
188+ "deprecated" ,
189+ # account the `deprecation_message` provided for the field.
190+ "deprecation_message" ,
191+ ]
192+
193+ try :
194+ result = subprocess .run (cmd , check = True , capture_output = True , text = True )
195+ print (f"Generated { output_py } " )
196+ except subprocess .CalledProcessError as e :
197+ print (f"Error generating { output_py } : { e } " )
198+ print (f"stdout: { e .stdout } " )
199+ print (f"stderr: { e .stderr } " )
200+ sys .exit (1 )
201+
202+ post_process_codegen (generated_dir , post_processed_dir )
203+
204+ if os .path .exists (LOCAL_OUTPUT_DIR_PATH ):
205+ shutil .rmtree (LOCAL_OUTPUT_DIR_PATH )
206+ shutil .copytree (post_processed_dir , LOCAL_OUTPUT_DIR_PATH )
207+
208+ print (f"Generated models exported to { LOCAL_OUTPUT_DIR_PATH } " )
209+
210+
211+ if __name__ == "__main__" :
212+ main ()
0 commit comments