Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
### Fixes

- Fix `hard_deletes: invalidate` incorrectly invalidating active records in snapshots (thanks @Zurbste!) ([#1281](https://github.com/databricks/dbt-databricks/issues/1281))
- Fix serverless Python model environment configuration: use `environment_version` instead of deprecated `client` field. Users can now specify custom environment versions via `python_job_config.environments`. ([#1286](https://github.com/databricks/dbt-databricks/pull/1286))

### Under the Hood

Expand Down
7 changes: 7 additions & 0 deletions dbt/adapters/databricks/api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from databricks.sdk.service.iam import AccessControlRequest
from databricks.sdk.service.jobs import (
JobAccessControlRequest,
JobEnvironment,
JobSettings,
QueueSettings,
Run,
Expand Down Expand Up @@ -530,6 +531,12 @@ def _submit_job_to_databricks(
# Add any additional job settings
submission_params.update(additional_job_settings)

# Handle environments - convert dicts to JobEnvironment objects
if "environments" in submission_params:
submission_params["environments"] = [
JobEnvironment.from_dict(env) for env in submission_params["environments"]
]

# Filter out parameters that the Databricks SDK doesn't expect
# The SDK submit() method doesn't accept 'name' or 'run_name' in the request body
filtered_params = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ def compile(self, path: str) -> PythonJobDetails:
additional_job_config["environments"] = [
{
"environment_key": self.environment_key,
"spec": {"client": "2", "dependencies": self.environment_deps},
"spec": {"environment_version": "4", "dependencies": self.environment_deps},
}
]
job_spec.update(self.cluster_spec)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,7 @@ def models(self):


@pytest.mark.python
# @pytest.mark.skip_profile("databricks_cluster", "databricks_uc_cluster")
@pytest.mark.skip("Not available in Databricks yet")
@pytest.mark.skip_profile("databricks_cluster", "databricks_uc_cluster")
class TestServerlessClusterWithEnvironment(BasePythonModelTests):
@pytest.fixture(scope="class")
def models(self):
Expand Down
42 changes: 42 additions & 0 deletions tests/unit/python/test_python_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,3 +212,45 @@ def test_python_job_config__extra_values(self):
}
job_config = PythonJobConfig(**config).dict()
assert job_config == {"name": "name", "foo": "bar"}

def test_python_job_config__environments_passed_through(self):
"""Test that environments field is properly passed through to dict().

See GitHub issue #1277: Users need to be able to specify environments
to control serverless version.
"""
environments = [
{
"environment_key": "default",
"spec": {"environment_version": "3"},
}
]
config = {
"name": "test_job",
"environments": environments,
}
job_config = PythonJobConfig(**config).dict()
assert job_config == {"name": "test_job", "environments": environments}

def test_python_job_config__environments_with_dependencies(self):
"""Test environments with dependencies are properly passed through.

See GitHub issue #1277: Users should be able to specify full environment
configuration including version and dependencies.
"""
environments = [
{
"environment_key": "custom_env",
"spec": {
"environment_version": "3",
"dependencies": ["pandas", "numpy"],
},
}
]
config = {
"name": "test_job",
"environments": environments,
}
job_config = PythonJobConfig(**config).dict()
assert job_config["environments"] == environments
assert job_config["environments"][0]["spec"]["environment_version"] == "3"
93 changes: 92 additions & 1 deletion tests/unit/python/test_python_job_support.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,98 @@ def test_compile__nonempty_configs(
"environments": [
{
"environment_key": environment_key,
"spec": {"client": "2", "dependencies": ["requests"]},
"spec": {"environment_version": "4", "dependencies": ["requests"]},
}
],
}

def test_compile__uses_environment_version_not_deprecated_client(
self, client, permission_builder, parsed_model, run_name, environment_key
):
"""Test that environment_version is used instead of deprecated 'client' field.

See GitHub issue #1277: The Databricks API deprecated the 'client' field
in favor of 'environment_version'.
"""
parsed_model.config.packages = []
parsed_model.config.index_url = None
parsed_model.config.python_job_config.dict.return_value = {}

permission_builder.build_job_permissions.return_value = []
compiler = PythonJobConfigCompiler(client, permission_builder, parsed_model, {})
details = compiler.compile("path")

# Verify environment_version is used, not client
env_spec = details.additional_job_config["environments"][0]["spec"]
assert "environment_version" in env_spec, (
"Should use 'environment_version' not deprecated 'client'"
)
assert "client" not in env_spec, "Should not use deprecated 'client' field"

def test_compile__respects_user_provided_environments(
self, client, permission_builder, parsed_model, run_name
):
"""Test that user-provided environments in python_job_config are respected.

See GitHub issue #1277: Users should be able to specify their own
environments configuration to control serverless version.
"""
parsed_model.config.packages = []
parsed_model.config.index_url = None
parsed_model.config.environment_key = "custom_env"
parsed_model.config.environment_dependencies = [] # No auto-generated deps

# User provides their own environments configuration
user_environments = [
{
"environment_key": "custom_env",
"spec": {"environment_version": "3"},
}
]
parsed_model.config.python_job_config.dict.return_value = {
"environments": user_environments
}

permission_builder.build_job_permissions.return_value = []
compiler = PythonJobConfigCompiler(client, permission_builder, parsed_model, {})
details = compiler.compile("path")

# User-provided environments should be preserved
assert details.additional_job_config["environments"] == user_environments
# The environment_key should still be set in job_spec
assert details.job_spec["environment_key"] == "custom_env"

def test_compile__user_environments_override_auto_generated(
self, client, permission_builder, parsed_model, run_name
):
"""Test that user-provided environments override auto-generated ones.

See GitHub issue #1277: Even when environment_dependencies are set,
user-provided environments should take precedence.
"""
parsed_model.config.packages = []
parsed_model.config.index_url = None
parsed_model.config.environment_key = "my_env"
parsed_model.config.environment_dependencies = ["pandas", "numpy"] # Would trigger auto-gen

# User provides their own environments with specific version
user_environments = [
{
"environment_key": "my_env",
"spec": {"environment_version": "3", "dependencies": ["requests"]},
}
]
parsed_model.config.python_job_config.dict.return_value = {
"environments": user_environments
}

permission_builder.build_job_permissions.return_value = []
compiler = PythonJobConfigCompiler(client, permission_builder, parsed_model, {})
details = compiler.compile("path")

# User-provided environments should override, not be merged
assert details.additional_job_config["environments"] == user_environments
# Should have user's dependencies, not auto-generated ones
assert details.additional_job_config["environments"][0]["spec"]["dependencies"] == [
"requests"
]
Loading