Skip to content

Commit 5b9124a

Browse files
Remove dagger dependencies and replace with uv scripts
- Replace dagger containerized execution with uv scripts using inline dependencies - Use subprocess calls instead of async dagger operations - Maintain identical datamodel-codegen command line arguments - Keep same post-processing logic for pydantic imports and deprecated fields - Update shell script to use 'uv run' instead of dagger installation Co-Authored-By: AJ Steers <[email protected]>
1 parent bd615ad commit 5b9124a

File tree

2 files changed

+79
-67
lines changed

2 files changed

+79
-67
lines changed
Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,7 @@
11
#!/usr/bin/env bash
22

3-
# We need to run this script in a docker container because we need to use a
4-
# specific version of datamodel-codegen that generates pydantic v1 models (correctly).
5-
# The newer datamodel-codegen's "pydantic v1" models are different than those v1 models
6-
# generated by the older version of datamodel-codegen.
3+
# Generate component manifest files using datamodel-codegen.
74

85
set -e
96

10-
pip install dagger-io==0.13.3
11-
python bin/generate_component_manifest_files.py
7+
uv run bin/generate_component_manifest_files.py

bin/generate_component_manifest_files.py

Lines changed: 77 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,29 @@
1+
#!/usr/bin/env python3
2+
# /// script
3+
# dependencies = [
4+
# "datamodel-code-generator==0.26.3",
5+
# "PyYAML>=6.0.1",
6+
# ]
7+
# ///
8+
19
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
210

311
import json
12+
import os
413
import re
14+
import shutil
15+
import subprocess
516
import sys
17+
import tempfile
618
from glob import glob
719
from pathlib import Path
820

9-
import anyio
10-
import dagger
1121
import yaml
1222

13-
PYTHON_IMAGE = "python:3.10"
1423
LOCAL_YAML_DIR_PATH = "airbyte_cdk/sources/declarative"
1524
LOCAL_OUTPUT_DIR_PATH = "airbyte_cdk/sources/declarative/models"
1625

1726

18-
PIP_DEPENDENCIES = [
19-
"datamodel_code_generator==0.26.3",
20-
]
21-
22-
2327
def get_all_yaml_files_without_ext() -> list[str]:
2428
return [Path(f).stem for f in glob(f"{LOCAL_YAML_DIR_PATH}/*.yaml")]
2529

@@ -119,15 +123,18 @@ def replace_base_model_for_classes_with_deprecated_fields(post_processed_content
119123
return post_processed_content
120124

121125

122-
async def post_process_codegen(codegen_container: dagger.Container):
123-
codegen_container = codegen_container.with_exec(
124-
["mkdir", "/generated_post_processed"], use_entrypoint=True
125-
)
126-
for generated_file in await codegen_container.directory("/generated").entries():
126+
def post_process_codegen(generated_dir: str, post_processed_dir: str):
127+
"""Post-process generated files to fix pydantic imports and deprecated fields."""
128+
os.makedirs(post_processed_dir, exist_ok=True)
129+
130+
for generated_file in os.listdir(generated_dir):
127131
if generated_file.endswith(".py"):
128-
original_content = await codegen_container.file(
129-
f"/generated/{generated_file}"
130-
).contents()
132+
input_path = os.path.join(generated_dir, generated_file)
133+
output_path = os.path.join(post_processed_dir, generated_file)
134+
135+
with open(input_path, "r") as f:
136+
original_content = f.read()
137+
131138
# the space before _parameters is intentional to avoid replacing things like `request_parameters:` with `requestparameters:`
132139
post_processed_content = original_content.replace(
133140
" _parameters:", " parameters:"
@@ -137,56 +144,65 @@ async def post_process_codegen(codegen_container: dagger.Container):
137144
post_processed_content
138145
)
139146

140-
codegen_container = codegen_container.with_new_file(
141-
f"/generated_post_processed/{generated_file}", contents=post_processed_content
142-
)
143-
return codegen_container
147+
with open(output_path, "w") as f:
148+
f.write(post_processed_content)
144149

145150

146-
async def main():
151+
def main():
147152
generate_json_schema()
148153
init_module_content = generate_init_module_content()
149154

150-
async with dagger.Connection(dagger.Config(log_output=sys.stderr)) as dagger_client:
151-
codegen_container = (
152-
dagger_client.container()
153-
.from_(PYTHON_IMAGE)
154-
.with_exec(["mkdir", "/generated"], use_entrypoint=True)
155-
.with_exec(["pip", "install", " ".join(PIP_DEPENDENCIES)], use_entrypoint=True)
156-
.with_mounted_directory(
157-
"/yaml", dagger_client.host().directory(LOCAL_YAML_DIR_PATH, include=["*.yaml"])
158-
)
159-
.with_new_file("/generated/__init__.py", contents=init_module_content)
160-
)
161-
for yaml_file in get_all_yaml_files_without_ext():
162-
codegen_container = codegen_container.with_exec(
163-
[
164-
"datamodel-codegen",
165-
"--input",
166-
f"/yaml/{yaml_file}.yaml",
167-
"--output",
168-
f"/generated/{yaml_file}.py",
169-
"--disable-timestamp",
170-
"--enum-field-as-literal",
171-
"one",
172-
"--set-default-enum-member",
173-
"--use-double-quotes",
174-
"--remove-special-field-name-prefix",
175-
# allow usage of the extra key such as `deprecated`, etc.
176-
"--field-extra-keys",
177-
# account the `deprecated` flag provided for the field.
178-
"deprecated",
179-
# account the `deprecation_message` provided for the field.
180-
"deprecation_message",
181-
],
182-
use_entrypoint=True,
183-
)
155+
with tempfile.TemporaryDirectory() as temp_dir:
156+
generated_dir = os.path.join(temp_dir, "generated")
157+
post_processed_dir = os.path.join(temp_dir, "generated_post_processed")
184158

185-
await (
186-
(await post_process_codegen(codegen_container))
187-
.directory("/generated_post_processed")
188-
.export(LOCAL_OUTPUT_DIR_PATH)
189-
)
159+
os.makedirs(generated_dir, exist_ok=True)
190160

161+
init_file_path = os.path.join(generated_dir, "__init__.py")
162+
with open(init_file_path, "w") as f:
163+
f.write(init_module_content)
191164

192-
anyio.run(main)
165+
for yaml_file in get_all_yaml_files_without_ext():
166+
input_yaml = os.path.join(LOCAL_YAML_DIR_PATH, f"{yaml_file}.yaml")
167+
output_py = os.path.join(generated_dir, f"{yaml_file}.py")
168+
169+
cmd = [
170+
"datamodel-codegen",
171+
"--input",
172+
input_yaml,
173+
"--output",
174+
output_py,
175+
"--disable-timestamp",
176+
"--enum-field-as-literal",
177+
"one",
178+
"--set-default-enum-member",
179+
"--use-double-quotes",
180+
"--remove-special-field-name-prefix",
181+
# allow usage of the extra key such as `deprecated`, etc.
182+
"--field-extra-keys",
183+
# account the `deprecated` flag provided for the field.
184+
"deprecated",
185+
# account the `deprecation_message` provided for the field.
186+
"deprecation_message",
187+
]
188+
189+
try:
190+
result = subprocess.run(cmd, check=True, capture_output=True, text=True)
191+
print(f"Generated {output_py}")
192+
except subprocess.CalledProcessError as e:
193+
print(f"Error generating {output_py}: {e}")
194+
print(f"stdout: {e.stdout}")
195+
print(f"stderr: {e.stderr}")
196+
sys.exit(1)
197+
198+
post_process_codegen(generated_dir, post_processed_dir)
199+
200+
if os.path.exists(LOCAL_OUTPUT_DIR_PATH):
201+
shutil.rmtree(LOCAL_OUTPUT_DIR_PATH)
202+
shutil.copytree(post_processed_dir, LOCAL_OUTPUT_DIR_PATH)
203+
204+
print(f"Generated models exported to {LOCAL_OUTPUT_DIR_PATH}")
205+
206+
207+
if __name__ == "__main__":
208+
main()

0 commit comments

Comments
 (0)