Skip to content

Commit 4878f56

Browse files
authored
fix: Serverless Python model environment version configuration (#1277) (#1286)
Resolves #1277 ### Description - Replace deprecated 'client' field with 'environment_version' in environment spec for serverless Python models - Add JobEnvironment SDK object conversion in api_client.py to fix 'dict object has no attribute as_dict' error - Enable TestServerlessClusterWithEnvironment functional test (was previously skipped as 'not available in Databricks yet') - Add unit tests for environment_version usage and user-provided environments override behavior - Users can now specify custom environment versions via python_job_config.environments ### Checklist - [x] I have run this code in development and it appears to resolve the stated issue - [x] This PR includes tests, or tests are not required/relevant for this PR - [x] I have updated the `CHANGELOG.md` and added information about my change to the "dbt-databricks next" section.
1 parent 03c21b3 commit 4878f56

File tree

6 files changed

+144
-4
lines changed

6 files changed

+144
-4
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
### Fixes
88

99
- Fix `hard_deletes: invalidate` incorrectly invalidating active records in snapshots (thanks @Zurbste!) ([#1281](https://github.com/databricks/dbt-databricks/issues/1281))
10+
- Fix serverless Python model environment configuration: use `environment_version` instead of deprecated `client` field. Users can now specify custom environment versions via `python_job_config.environments`. ([#1286](https://github.com/databricks/dbt-databricks/pull/1286))
1011

1112
### Under the Hood
1213

dbt/adapters/databricks/api_client.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from databricks.sdk.service.iam import AccessControlRequest
1919
from databricks.sdk.service.jobs import (
2020
JobAccessControlRequest,
21+
JobEnvironment,
2122
JobSettings,
2223
QueueSettings,
2324
Run,
@@ -530,6 +531,12 @@ def _submit_job_to_databricks(
530531
# Add any additional job settings
531532
submission_params.update(additional_job_settings)
532533

534+
# Handle environments - convert dicts to JobEnvironment objects
535+
if "environments" in submission_params:
536+
submission_params["environments"] = [
537+
JobEnvironment.from_dict(env) for env in submission_params["environments"]
538+
]
539+
533540
# Filter out parameters that the Databricks SDK doesn't expect
534541
# The SDK submit() method doesn't accept 'name' or 'run_name' in the request body
535542
filtered_params = {

dbt/adapters/databricks/python_models/python_submissions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,7 @@ def compile(self, path: str) -> PythonJobDetails:
309309
additional_job_config["environments"] = [
310310
{
311311
"environment_key": self.environment_key,
312-
"spec": {"client": "2", "dependencies": self.environment_deps},
312+
"spec": {"environment_version": "4", "dependencies": self.environment_deps},
313313
}
314314
]
315315
job_spec.update(self.cluster_spec)

tests/functional/adapter/python_model/test_python_model.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -181,8 +181,7 @@ def models(self):
181181

182182

183183
@pytest.mark.python
184-
# @pytest.mark.skip_profile("databricks_cluster", "databricks_uc_cluster")
185-
@pytest.mark.skip("Not available in Databricks yet")
184+
@pytest.mark.skip_profile("databricks_cluster", "databricks_uc_cluster")
186185
class TestServerlessClusterWithEnvironment(BasePythonModelTests):
187186
@pytest.fixture(scope="class")
188187
def models(self):

tests/unit/python/test_python_config.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,3 +212,45 @@ def test_python_job_config__extra_values(self):
212212
}
213213
job_config = PythonJobConfig(**config).dict()
214214
assert job_config == {"name": "name", "foo": "bar"}
215+
216+
def test_python_job_config__environments_passed_through(self):
217+
"""Test that environments field is properly passed through to dict().
218+
219+
See GitHub issue #1277: Users need to be able to specify environments
220+
to control serverless version.
221+
"""
222+
environments = [
223+
{
224+
"environment_key": "default",
225+
"spec": {"environment_version": "3"},
226+
}
227+
]
228+
config = {
229+
"name": "test_job",
230+
"environments": environments,
231+
}
232+
job_config = PythonJobConfig(**config).dict()
233+
assert job_config == {"name": "test_job", "environments": environments}
234+
235+
def test_python_job_config__environments_with_dependencies(self):
236+
"""Test environments with dependencies are properly passed through.
237+
238+
See GitHub issue #1277: Users should be able to specify full environment
239+
configuration including version and dependencies.
240+
"""
241+
environments = [
242+
{
243+
"environment_key": "custom_env",
244+
"spec": {
245+
"environment_version": "3",
246+
"dependencies": ["pandas", "numpy"],
247+
},
248+
}
249+
]
250+
config = {
251+
"name": "test_job",
252+
"environments": environments,
253+
}
254+
job_config = PythonJobConfig(**config).dict()
255+
assert job_config["environments"] == environments
256+
assert job_config["environments"][0]["spec"]["environment_version"] == "3"

tests/unit/python/test_python_job_support.py

Lines changed: 92 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,98 @@ def test_compile__nonempty_configs(
289289
"environments": [
290290
{
291291
"environment_key": environment_key,
292-
"spec": {"client": "2", "dependencies": ["requests"]},
292+
"spec": {"environment_version": "4", "dependencies": ["requests"]},
293293
}
294294
],
295295
}
296+
297+
def test_compile__uses_environment_version_not_deprecated_client(
298+
self, client, permission_builder, parsed_model, run_name, environment_key
299+
):
300+
"""Test that environment_version is used instead of deprecated 'client' field.
301+
302+
See GitHub issue #1277: The Databricks API deprecated the 'client' field
303+
in favor of 'environment_version'.
304+
"""
305+
parsed_model.config.packages = []
306+
parsed_model.config.index_url = None
307+
parsed_model.config.python_job_config.dict.return_value = {}
308+
309+
permission_builder.build_job_permissions.return_value = []
310+
compiler = PythonJobConfigCompiler(client, permission_builder, parsed_model, {})
311+
details = compiler.compile("path")
312+
313+
# Verify environment_version is used, not client
314+
env_spec = details.additional_job_config["environments"][0]["spec"]
315+
assert "environment_version" in env_spec, (
316+
"Should use 'environment_version' not deprecated 'client'"
317+
)
318+
assert "client" not in env_spec, "Should not use deprecated 'client' field"
319+
320+
def test_compile__respects_user_provided_environments(
321+
self, client, permission_builder, parsed_model, run_name
322+
):
323+
"""Test that user-provided environments in python_job_config are respected.
324+
325+
See GitHub issue #1277: Users should be able to specify their own
326+
environments configuration to control serverless version.
327+
"""
328+
parsed_model.config.packages = []
329+
parsed_model.config.index_url = None
330+
parsed_model.config.environment_key = "custom_env"
331+
parsed_model.config.environment_dependencies = [] # No auto-generated deps
332+
333+
# User provides their own environments configuration
334+
user_environments = [
335+
{
336+
"environment_key": "custom_env",
337+
"spec": {"environment_version": "3"},
338+
}
339+
]
340+
parsed_model.config.python_job_config.dict.return_value = {
341+
"environments": user_environments
342+
}
343+
344+
permission_builder.build_job_permissions.return_value = []
345+
compiler = PythonJobConfigCompiler(client, permission_builder, parsed_model, {})
346+
details = compiler.compile("path")
347+
348+
# User-provided environments should be preserved
349+
assert details.additional_job_config["environments"] == user_environments
350+
# The environment_key should still be set in job_spec
351+
assert details.job_spec["environment_key"] == "custom_env"
352+
353+
def test_compile__user_environments_override_auto_generated(
354+
self, client, permission_builder, parsed_model, run_name
355+
):
356+
"""Test that user-provided environments override auto-generated ones.
357+
358+
See GitHub issue #1277: Even when environment_dependencies are set,
359+
user-provided environments should take precedence.
360+
"""
361+
parsed_model.config.packages = []
362+
parsed_model.config.index_url = None
363+
parsed_model.config.environment_key = "my_env"
364+
parsed_model.config.environment_dependencies = ["pandas", "numpy"] # Would trigger auto-gen
365+
366+
# User provides their own environments with specific version
367+
user_environments = [
368+
{
369+
"environment_key": "my_env",
370+
"spec": {"environment_version": "3", "dependencies": ["requests"]},
371+
}
372+
]
373+
parsed_model.config.python_job_config.dict.return_value = {
374+
"environments": user_environments
375+
}
376+
377+
permission_builder.build_job_permissions.return_value = []
378+
compiler = PythonJobConfigCompiler(client, permission_builder, parsed_model, {})
379+
details = compiler.compile("path")
380+
381+
# User-provided environments should override, not be merged
382+
assert details.additional_job_config["environments"] == user_environments
383+
# Should have user's dependencies, not auto-generated ones
384+
assert details.additional_job_config["environments"][0]["spec"]["dependencies"] == [
385+
"requests"
386+
]

0 commit comments

Comments
 (0)