Skip to content

Commit cf7e107

Browse files
authored
[Python] Generate each resource in separate namespace (#3428)
## Changes Change code generation to make each namespace (e.g., `databricks.bundles.jobs`) isolated from another, following the design of Databricks SDK v1. Since we already exported all necessary classes from namespace packages, imports don't change. There are 2 copies of the cluster-related classes, and they can't be used interchangeably anymore when constructing jobs and pipelines. One can be converted into another through dictionaries: ```python import databricks.bundles.jobs as jobs import databricks.bundles.pipelines as pipelines job_init_scripts = jobs.InitScriptInfo(...) pipeline_init_scripts = pipelines.InitScriptInfo.from_dict(job_init_scripts.to_dict()) ``` ## Why It uncouples resources from each other and allows us to evolve their schema independently ## Tests Updating existing tests and reviewing codegen diff
1 parent 06904a0 commit cf7e107

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+1515
-329
lines changed

NEXT_CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
## Release v0.265.0
44

55
### Notable Changes
6+
* Separate generated classes between jobs and pipelines in Python support ([#3428](https://github.com/databricks/cli/pull/3428))
67

78
### Dependency updates
89
* Upgrade TF provider to 1.87.0 ([#3430](https://github.com/databricks/cli/pull/3430))

experimental/python/codegen/codegen/generated_dataclass.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -134,12 +134,13 @@ class GeneratedDataclass:
134134

135135

136136
def generate_field(
137+
namespace: str,
137138
field_name: str,
138139
prop: Property,
139140
is_required: bool,
140141
) -> GeneratedField:
141-
field_type = generate_type(prop.ref, is_param=False)
142-
param_type = generate_type(prop.ref, is_param=True)
142+
field_type = generate_type(namespace, prop.ref, is_param=False)
143+
param_type = generate_type(namespace, prop.ref, is_param=True)
143144

144145
field_type = variable_or_type(field_type, is_required=is_required)
145146
param_type = variable_or_type(param_type, is_required=is_required)
@@ -255,10 +256,11 @@ def variable_or_dict_type(element_type: GeneratedType) -> GeneratedType:
255256
)
256257

257258

258-
def generate_type(ref: str, is_param: bool) -> GeneratedType:
259+
def generate_type(namespace: str, ref: str, is_param: bool) -> GeneratedType:
259260
if ref.startswith("#/$defs/slice/"):
260261
element_ref = ref.replace("#/$defs/slice/", "#/$defs/")
261262
element_type = generate_type(
263+
namespace=namespace,
262264
ref=element_ref,
263265
is_param=is_param,
264266
)
@@ -273,7 +275,7 @@ def generate_type(ref: str, is_param: bool) -> GeneratedType:
273275
return dict_type()
274276

275277
class_name = packages.get_class_name(ref)
276-
package = packages.get_package(ref)
278+
package = packages.get_package(namespace, ref)
277279

278280
if is_param and package:
279281
class_name += "Param"
@@ -293,20 +295,24 @@ def resource_type() -> GeneratedType:
293295
)
294296

295297

296-
def generate_dataclass(schema_name: str, schema: Schema) -> GeneratedDataclass:
298+
def generate_dataclass(
299+
namespace: str,
300+
schema_name: str,
301+
schema: Schema,
302+
) -> GeneratedDataclass:
297303
print(f"Generating dataclass for {schema_name}")
298304

299305
fields = list[GeneratedField]()
300306
class_name = packages.get_class_name(schema_name)
301307

302308
for name, prop in schema.properties.items():
303309
is_required = name in schema.required
304-
field = generate_field(name, prop, is_required=is_required)
310+
field = generate_field(namespace, name, prop, is_required=is_required)
305311

306312
fields.append(field)
307313

308314
extends = []
309-
package = packages.get_package(schema_name)
315+
package = packages.get_package(namespace, schema_name)
310316

311317
assert package
312318

experimental/python/codegen/codegen/generated_dataclass_patch.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,11 @@ class Bar:
3737

3838
# see also _append_resolve_recursive_imports
3939

40-
models["jobs.ForEachTask"] = _quote_recursive_references_for_model(
41-
models["jobs.ForEachTask"],
42-
references={"Task", "TaskParam"},
43-
)
40+
if "jobs.ForEachTask" in models:
41+
models["jobs.ForEachTask"] = _quote_recursive_references_for_model(
42+
models["jobs.ForEachTask"],
43+
references={"Task", "TaskParam"},
44+
)
4445

4546

4647
def _quote_recursive_references_for_model(

experimental/python/codegen/codegen/generated_enum.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,11 @@ class GeneratedEnum:
1717
experimental: bool
1818

1919

20-
def generate_enum(schema_name: str, schema: Schema) -> GeneratedEnum:
20+
def generate_enum(namespace: str, schema_name: str, schema: Schema) -> GeneratedEnum:
2121
assert schema.enum
2222

2323
class_name = packages.get_class_name(schema_name)
24-
package = packages.get_package(schema_name)
24+
package = packages.get_package(namespace, schema_name)
2525
values = {}
2626

2727
assert package

experimental/python/codegen/codegen/generated_imports.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@ def append_enum_imports(
1111
enums: dict[str, GeneratedEnum],
1212
exclude_packages: list[str],
1313
) -> None:
14-
for schema_name in enums.keys():
15-
package = packages.get_package(schema_name)
16-
class_name = packages.get_class_name(schema_name)
14+
for generated in enums.values():
15+
package = generated.package
16+
class_name = generated.class_name
1717

1818
if package in exclude_packages:
1919
continue
@@ -26,9 +26,9 @@ def append_dataclass_imports(
2626
dataclasses: dict[str, GeneratedDataclass],
2727
exclude_packages: list[str],
2828
) -> None:
29-
for schema_name in dataclasses.keys():
30-
package = packages.get_package(schema_name)
31-
class_name = packages.get_class_name(schema_name)
29+
for generated in dataclasses.values():
30+
package = generated.package
31+
class_name = generated.class_name
3232

3333
if package in exclude_packages:
3434
continue

experimental/python/codegen/codegen/main.py

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -28,20 +28,21 @@ def main(output: str):
2828
schemas = _remove_deprecated_fields(schemas)
2929
schemas = _remove_unused_schemas(packages.RESOURCE_TYPES, schemas)
3030

31-
dataclasses, enums = _generate_code(schemas)
32-
33-
generated_dataclass_patch.reorder_required_fields(dataclasses)
34-
generated_dataclass_patch.quote_recursive_references(dataclasses)
31+
# each resource has own namespace and is generated separately so
32+
# that there are no dependencies between namespaces as in Databricks SDK v1
33+
for resource, namespace in packages.RESOURCE_NAMESPACE.items():
34+
# only generate code for schemas used directly or transitively by resource
35+
reachable = _collect_reachable_schemas([resource], schemas)
36+
reachable_schemas = {k: v for k, v in schemas.items() if k in reachable}
3537

36-
_write_code(dataclasses, enums, output)
38+
dataclasses, enums = _generate_code(namespace, reachable_schemas)
3739

38-
for resource in packages.RESOURCE_TYPES:
39-
reachable = _collect_reachable_schemas([resource], schemas)
40+
generated_dataclass_patch.reorder_required_fields(dataclasses)
41+
generated_dataclass_patch.quote_recursive_references(dataclasses)
4042

41-
resource_dataclasses = {k: v for k, v in dataclasses.items() if k in reachable}
42-
resource_enums = {k: v for k, v in enums.items() if k in reachable}
43+
_write_code(dataclasses, enums, output)
4344

44-
_write_exports(resource, resource_dataclasses, resource_enums, output)
45+
_write_exports(namespace, dataclasses, enums, output)
4546

4647

4748
def _transitively_mark_deprecated_and_private(
@@ -95,18 +96,21 @@ def _remove_deprecated_fields(
9596

9697

9798
def _generate_code(
99+
namespace: str,
98100
schemas: dict[str, openapi.Schema],
99101
) -> tuple[dict[str, GeneratedDataclass], dict[str, GeneratedEnum]]:
100102
dataclasses = {}
101103
enums = {}
102104

103105
for schema_name, schema in schemas.items():
104106
if schema.type == openapi.SchemaType.OBJECT:
105-
generated = generated_dataclass.generate_dataclass(schema_name, schema)
107+
generated = generated_dataclass.generate_dataclass(
108+
namespace, schema_name, schema
109+
)
106110

107111
dataclasses[schema_name] = generated
108112
elif schema.type == openapi.SchemaType.STRING:
109-
generated = generated_enum.generate_enum(schema_name, schema)
113+
generated = generated_enum.generate_enum(namespace, schema_name, schema)
110114

111115
enums[schema_name] = generated
112116
else:
@@ -116,7 +120,7 @@ def _generate_code(
116120

117121

118122
def _write_exports(
119-
root: str,
123+
namespace: str,
120124
dataclasses: dict[str, GeneratedDataclass],
121125
enums: dict[str, GeneratedEnum],
122126
output: str,
@@ -148,14 +152,11 @@ def _write_exports(
148152
generated_imports.append_enum_imports(b, enums, exclude_packages=[])
149153

150154
# FIXME should be better generalized
151-
if root == "resources.Job":
155+
if namespace == "jobs":
152156
_append_resolve_recursive_imports(b)
153157

154-
root_package = packages.get_package(root)
155-
assert root_package
156-
157-
# transform databricks.bundles.jobs._models.job -> databricks/bundles/jobs
158-
package_path = Path(root_package.replace(".", "/")).parent.parent
158+
root_package = packages.get_root_package(namespace)
159+
package_path = Path(root_package.replace(".", "/"))
159160

160161
source_path = Path(output) / package_path / "__init__.py"
161162
source_path.parent.mkdir(exist_ok=True, parents=True)

experimental/python/codegen/codegen/packages.py

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,13 @@
11
import re
22
from typing import Optional
33

4-
RESOURCE_NAMESPACE_OVERRIDE = {
4+
# All supported resource types and their namespace
5+
RESOURCE_NAMESPACE = {
56
"resources.Job": "jobs",
67
"resources.Pipeline": "pipelines",
7-
"resources.JobPermission": "jobs",
8-
"resources.JobPermissionLevel": "jobs",
9-
"resources.PipelinePermission": "pipelines",
10-
"resources.PipelinePermissionLevel": "pipelines",
118
}
129

13-
# All supported resource types
14-
RESOURCE_TYPES = [
15-
"resources.Job",
16-
"resources.Pipeline",
17-
]
10+
RESOURCE_TYPES = list(RESOURCE_NAMESPACE.keys())
1811

1912
# Namespaces to load from OpenAPI spec.
2013
#
@@ -72,7 +65,11 @@ def should_load_ref(ref: str) -> bool:
7265
return name in PRIMITIVES
7366

7467

75-
def get_package(ref: str) -> Optional[str]:
68+
def get_root_package(namespace: str) -> str:
69+
return f"databricks.bundles.{namespace}"
70+
71+
72+
def get_package(namespace: str, ref: str) -> Optional[str]:
7673
"""
7774
Returns Python package for a given OpenAPI ref.
7875
Returns None for builtin types.
@@ -83,11 +80,7 @@ def get_package(ref: str) -> Optional[str]:
8380
if full_name in PRIMITIVES:
8481
return None
8582

86-
[namespace, name] = full_name.split(".")
87-
88-
if override := RESOURCE_NAMESPACE_OVERRIDE.get(full_name):
89-
namespace = override
90-
83+
[_, name] = full_name.split(".")
9184
package_name = re.sub(r"(?<!^)(?=[A-Z])", "_", name).lower()
9285

93-
return f"databricks.bundles.{namespace}._models.{package_name}"
86+
return f"{get_root_package(namespace)}._models.{package_name}"

experimental/python/codegen/codegen_tests/test_generated_dataclass.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414

1515
def test_generate_type_string():
1616
generated_type = generate_type(
17-
"#/$defs/string",
17+
namespace="jobs",
18+
ref="#/$defs/string",
1819
is_param=False,
1920
)
2021

@@ -27,7 +28,8 @@ def test_generate_type_string():
2728

2829
def test_generate_type_dict():
2930
generated_type = generate_type(
30-
"#/$defs/map/string",
31+
namespace="jobs",
32+
ref="#/$defs/map/string",
3133
is_param=False,
3234
)
3335

@@ -36,6 +38,7 @@ def test_generate_type_dict():
3638

3739
def test_generate_dataclass():
3840
generated = generate_dataclass(
41+
namespace="bananas",
3942
schema_name="jobs.Task",
4043
schema=Schema(
4144
type=SchemaType.OBJECT,
@@ -52,7 +55,7 @@ def test_generate_dataclass():
5255

5356
assert generated == GeneratedDataclass(
5457
class_name="Task",
55-
package="databricks.bundles.jobs._models.task",
58+
package="databricks.bundles.bananas._models.task",
5659
description="task description",
5760
extends=[],
5861
fields=[

experimental/python/codegen/codegen_tests/test_generated_enum.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
def test_generate_enum():
66
generated = generate_enum(
7+
namespace="bananas",
78
schema_name="jobs.MyEnum",
89
schema=Schema(
910
enum=["myEnumValue"],
@@ -14,7 +15,7 @@ def test_generate_enum():
1415

1516
assert generated == GeneratedEnum(
1617
class_name="MyEnum",
17-
package="databricks.bundles.jobs._models.my_enum",
18+
package="databricks.bundles.bananas._models.my_enum",
1819
values={"MY_ENUM_VALUE": "myEnumValue"},
1920
description="enum description",
2021
experimental=False,

0 commit comments

Comments
 (0)