Vibe code schema support

kanterov · kanterov · commit f09963b95007 · 2025-08-11T15:52:13.000+02:00
diff --git a/acceptance/bundle/python/schemas-support/databricks.yml b/acceptance/bundle/python/schemas-support/databricks.yml
@@ -0,0 +1,17 @@
+bundle:
+  name: my_project
+
+sync: {paths: []} # don't need to copy files
+
+experimental:
+  python:
+    resources:
+      - "resources:load_resources"
+    mutators:
+      - "mutators:update_schema"
+
+resources:
+  schemas:
+    my_schema_1:
+      name: "My Schema"
+      catalog_name: "my_catalog"
diff --git a/acceptance/bundle/python/schemas-support/mutators.py b/acceptance/bundle/python/schemas-support/mutators.py
@@ -0,0 +1,11 @@
+from dataclasses import replace
+
+from databricks.bundles.core import schema_mutator
+from databricks.bundles.catalog import Schema
+
+
+@schema_mutator
+def update_schema(schema: Schema) -> Schema:
+    assert isinstance(schema.name, str)
+
+    return replace(schema, name=f"{schema.name} (updated)")
diff --git a/acceptance/bundle/python/schemas-support/resources.py b/acceptance/bundle/python/schemas-support/resources.py
@@ -0,0 +1,15 @@
+from databricks.bundles.core import Resources
+
+
+def load_resources() -> Resources:
+    resources = Resources()
+
+    resources.add_schema(
+        "my_schema_2",
+        {
+            "name": "My Schema (2)",
+            "catalog_name": "my_catalog_2",
+        },
+    )
+
+    return resources
diff --git a/acceptance/bundle/python/schemas-support/script b/acceptance/bundle/python/schemas-support/script
@@ -0,0 +1 @@
+uv run --with-requirements requirements-latest.txt --no-cache -q [CLI] bundle validate --output json
diff --git a/acceptance/bundle/python/schemas-support/test.toml b/acceptance/bundle/python/schemas-support/test.toml
@@ -0,0 +1,8 @@
+Local = true
+Cloud = false # tests don't interact with APIs
+
+[EnvMatrix]
+UV_ARGS = [
+    # pipelines are only supported in the latest version of the wheel
+    "--with-requirements requirements-latest.txt --no-cache",
+]
diff --git a/experimental/python/databricks/bundles/core/__init__.py b/experimental/python/databricks/bundles/core/__init__.py
@@ -14,6 +14,7 @@
     "VariableOrOptional",
     "job_mutator",
     "pipeline_mutator",
+    "schema_mutator",
     "load_resources_from_current_package_module",
     "load_resources_from_module",
     "load_resources_from_modules",
@@ -39,6 +40,7 @@
     ResourceMutator,
     job_mutator,
     pipeline_mutator,
+    schema_mutator,
 )
 from databricks.bundles.core._resources import Resources
 from databricks.bundles.core._variable import (
diff --git a/experimental/python/databricks/bundles/core/_resource_mutator.py b/experimental/python/databricks/bundles/core/_resource_mutator.py
@@ -6,6 +6,7 @@
 from databricks.bundles.core._resource import Resource
 
 if TYPE_CHECKING:
+    from databricks.bundles.catalog._models.schema import Schema
     from databricks.bundles.jobs._models.job import Job
     from databricks.bundles.pipelines._models.pipeline import Pipeline
 
@@ -127,3 +128,35 @@ def my_pipeline_mutator(bundle: Bundle, pipeline: Pipeline) -> Pipeline:
     from databricks.bundles.pipelines._models.pipeline import Pipeline
 
     return ResourceMutator(resource_type=Pipeline, function=function)
+
+
+@overload
+def schema_mutator(
+    function: Callable[[Bundle, "Schema"], "Schema"],
+) -> ResourceMutator["Schema"]: ...
+
+
+@overload
+def schema_mutator(
+    function: Callable[["Schema"], "Schema"],
+) -> ResourceMutator["Schema"]: ...
+
+
+def schema_mutator(function: Callable) -> ResourceMutator["Schema"]:
+    """
+    Decorator for defining a schema mutator. Function should return a new instance of the schema with the desired changes,
+    instead of mutating the input schema.
+
+    Example:
+
+    .. code-block:: python
+
+        @schema_mutator
+        def my_schema_mutator(bundle: Bundle, schema: Schema) -> Schema:
+            return replace(schema, name="my_schema")
+
+    :param function: Function that mutates a schema.
+    """
+    from databricks.bundles.catalog._models.schema import Schema
+
+    return ResourceMutator(resource_type=Schema, function=function)
diff --git a/experimental/python/databricks/bundles/core/_resource_type.py b/experimental/python/databricks/bundles/core/_resource_type.py
@@ -31,6 +31,7 @@ def all(cls) -> tuple["_ResourceType", ...]:
         # intentionally lazily load all resource types to avoid imports from databricks.bundles.core to
         # be imported in databricks.bundles.<resource_type>
 
+        from databricks.bundles.catalog._models.schema import Schema
         from databricks.bundles.jobs._models.job import Job
         from databricks.bundles.pipelines._models.pipeline import Pipeline
 
@@ -45,4 +46,9 @@ def all(cls) -> tuple["_ResourceType", ...]:
                 plural_name="pipelines",
                 singular_name="pipeline",
             ),
+            _ResourceType(
+                resource_type=Schema,
+                plural_name="schemas",
+                singular_name="schema",
+            ),
         )
diff --git a/experimental/python/databricks/bundles/core/_resources.py b/experimental/python/databricks/bundles/core/_resources.py
@@ -6,6 +6,7 @@
 from databricks.bundles.core._transform import _transform
 
 if TYPE_CHECKING:
+    from databricks.bundles.catalog._models.schema import Schema, SchemaParam
     from databricks.bundles.jobs._models.job import Job, JobParam
     from databricks.bundles.pipelines._models.pipeline import Pipeline, PipelineParam
 
@@ -57,6 +58,7 @@ def load_resources(bundle: Bundle) -> Resources:
     def __init__(self):
         self._jobs = dict[str, "Job"]()
         self._pipelines = dict[str, "Pipeline"]()
+        self._schemas = dict[str, "Schema"]()
         self._locations = dict[tuple[str, ...], Location]()
         self._diagnostics = Diagnostics()
 
@@ -68,6 +70,10 @@ def jobs(self) -> dict[str, "Job"]:
     def pipelines(self) -> dict[str, "Pipeline"]:
         return self._pipelines
 
+    @property
+    def schemas(self) -> dict[str, "Schema"]:
+        return self._schemas
+
     @property
     def diagnostics(self) -> Diagnostics:
         """
@@ -91,6 +97,7 @@ def add_resource(
         :param location: optional location of the resource in the source code
         """
 
+        from databricks.bundles.catalog import Schema
         from databricks.bundles.jobs import Job
         from databricks.bundles.pipelines import Pipeline
 
@@ -101,6 +108,8 @@ def add_resource(
                 self.add_job(resource_name, resource, location=location)
             case Pipeline():
                 self.add_pipeline(resource_name, resource, location=location)
+            case Schema():
+                self.add_schema(resource_name, resource, location=location)
             case _:
                 raise ValueError(f"Unsupported resource type: {type(resource)}")
 
@@ -168,6 +177,38 @@ def add_pipeline(
 
             self._pipelines[resource_name] = pipeline
 
+    def add_schema(
+        self,
+        resource_name: str,
+        schema: "SchemaParam",
+        *,
+        location: Optional[Location] = None,
+    ) -> None:
+        """
+        Adds a schema to the collection of resources. Resource name must be unique across all schemas.
+
+        :param resource_name: unique identifier for the schema
+        :param schema: the schema to add, can be Schema or dict
+        :param location: optional location of the schema in the source code
+        """
+        from databricks.bundles.catalog import Schema
+
+        schema = _transform(Schema, schema)
+        path = ("resources", "schemas", resource_name)
+        location = location or Location.from_stack_frame(depth=1)
+
+        if self._schemas.get(resource_name):
+            self.add_diagnostic_error(
+                msg=f"Duplicate resource name '{resource_name}' for a schema. Resource names must be unique.",
+                location=location,
+                path=path,
+            )
+        else:
+            if location:
+                self.add_location(path, location)
+
+            self._schemas[resource_name] = schema
+
     def add_location(self, path: tuple[str, ...], location: Location) -> None:
         """
         Associate source code location with a path in the bundle configuration.
@@ -244,6 +285,9 @@ def add_resources(self, other: "Resources") -> None:
         for name, pipeline in other.pipelines.items():
             self.add_pipeline(name, pipeline)
 
+        for name, schema in other.schemas.items():
+            self.add_schema(name, schema)
+
         for path, location in other._locations.items():
             self.add_location(path, location)
 
diff --git a/experimental/python/databricks_tests/core/test_resources.py b/experimental/python/databricks_tests/core/test_resources.py
@@ -9,8 +9,10 @@
     ResourceMutator,
     job_mutator,
     pipeline_mutator,
+    schema_mutator,
 )
 from databricks.bundles.core._resource_type import _ResourceType
+from databricks.bundles.catalog._models.schema import Schema
 from databricks.bundles.jobs._models.job import Job
 from databricks.bundles.pipelines._models.pipeline import Pipeline
 
@@ -43,6 +45,15 @@ class TestCase:
         ),
         resource_types[Pipeline],
     ),
+    (
+        TestCase(
+            add_resource=Resources.add_schema,
+            dict_example={"catalog_name": "my_catalog", "name": "my_schema"},
+            dataclass_example=Schema(catalog_name="my_catalog", name="my_schema"),
+            mutator=schema_mutator,
+        ),
+        resource_types[Schema],
+    ),
 ]
 test_case_ids = [tpe.plural_name for _, tpe in test_cases]
 

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+uv run --with-requirements requirements-latest.txt --no-cache -q [CLI] bundle validate --output json`