diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b20896c7..b1f497518 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ ### Fixes - Fix `hard_deletes: invalidate` incorrectly invalidating active records in snapshots (thanks @Zurbste!) ([#1281](https://github.com/databricks/dbt-databricks/issues/1281)) +- Fix serverless Python model environment configuration: use `environment_version` instead of deprecated `client` field. Users can now specify custom environment versions via `python_job_config.environments`. ([#1286](https://github.com/databricks/dbt-databricks/pull/1286)) ### Under the Hood diff --git a/dbt/adapters/databricks/api_client.py b/dbt/adapters/databricks/api_client.py index b7c9ba638..1ccdcba6e 100644 --- a/dbt/adapters/databricks/api_client.py +++ b/dbt/adapters/databricks/api_client.py @@ -18,6 +18,7 @@ from databricks.sdk.service.iam import AccessControlRequest from databricks.sdk.service.jobs import ( JobAccessControlRequest, + JobEnvironment, JobSettings, QueueSettings, Run, @@ -530,6 +531,12 @@ def _submit_job_to_databricks( # Add any additional job settings submission_params.update(additional_job_settings) + # Handle environments - convert dicts to JobEnvironment objects + if "environments" in submission_params: + submission_params["environments"] = [ + JobEnvironment.from_dict(env) for env in submission_params["environments"] + ] + # Filter out parameters that the Databricks SDK doesn't expect # The SDK submit() method doesn't accept 'name' or 'run_name' in the request body filtered_params = { diff --git a/dbt/adapters/databricks/python_models/python_submissions.py b/dbt/adapters/databricks/python_models/python_submissions.py index b4c502820..23c2a27c1 100644 --- a/dbt/adapters/databricks/python_models/python_submissions.py +++ b/dbt/adapters/databricks/python_models/python_submissions.py @@ -309,7 +309,7 @@ def compile(self, path: str) -> PythonJobDetails: additional_job_config["environments"] = [ { "environment_key": self.environment_key, - "spec": {"client": "2", "dependencies": self.environment_deps}, + "spec": {"environment_version": "4", "dependencies": self.environment_deps}, } ] job_spec.update(self.cluster_spec) diff --git a/tests/functional/adapter/python_model/test_python_model.py b/tests/functional/adapter/python_model/test_python_model.py index 72c2d4c91..acc479744 100644 --- a/tests/functional/adapter/python_model/test_python_model.py +++ b/tests/functional/adapter/python_model/test_python_model.py @@ -181,8 +181,7 @@ def models(self): @pytest.mark.python -# @pytest.mark.skip_profile("databricks_cluster", "databricks_uc_cluster") -@pytest.mark.skip("Not available in Databricks yet") +@pytest.mark.skip_profile("databricks_cluster", "databricks_uc_cluster") class TestServerlessClusterWithEnvironment(BasePythonModelTests): @pytest.fixture(scope="class") def models(self): diff --git a/tests/unit/python/test_python_config.py b/tests/unit/python/test_python_config.py index 7c9923c1b..abda6587a 100644 --- a/tests/unit/python/test_python_config.py +++ b/tests/unit/python/test_python_config.py @@ -212,3 +212,45 @@ def test_python_job_config__extra_values(self): } job_config = PythonJobConfig(**config).dict() assert job_config == {"name": "name", "foo": "bar"} + + def test_python_job_config__environments_passed_through(self): + """Test that environments field is properly passed through to dict(). + + See GitHub issue #1277: Users need to be able to specify environments + to control serverless version. + """ + environments = [ + { + "environment_key": "default", + "spec": {"environment_version": "3"}, + } + ] + config = { + "name": "test_job", + "environments": environments, + } + job_config = PythonJobConfig(**config).dict() + assert job_config == {"name": "test_job", "environments": environments} + + def test_python_job_config__environments_with_dependencies(self): + """Test environments with dependencies are properly passed through. + + See GitHub issue #1277: Users should be able to specify full environment + configuration including version and dependencies. + """ + environments = [ + { + "environment_key": "custom_env", + "spec": { + "environment_version": "3", + "dependencies": ["pandas", "numpy"], + }, + } + ] + config = { + "name": "test_job", + "environments": environments, + } + job_config = PythonJobConfig(**config).dict() + assert job_config["environments"] == environments + assert job_config["environments"][0]["spec"]["environment_version"] == "3" diff --git a/tests/unit/python/test_python_job_support.py b/tests/unit/python/test_python_job_support.py index f2009afc4..50536589a 100644 --- a/tests/unit/python/test_python_job_support.py +++ b/tests/unit/python/test_python_job_support.py @@ -289,7 +289,98 @@ def test_compile__nonempty_configs( "environments": [ { "environment_key": environment_key, - "spec": {"client": "2", "dependencies": ["requests"]}, + "spec": {"environment_version": "4", "dependencies": ["requests"]}, } ], } + + def test_compile__uses_environment_version_not_deprecated_client( + self, client, permission_builder, parsed_model, run_name, environment_key + ): + """Test that environment_version is used instead of deprecated 'client' field. + + See GitHub issue #1277: The Databricks API deprecated the 'client' field + in favor of 'environment_version'. + """ + parsed_model.config.packages = [] + parsed_model.config.index_url = None + parsed_model.config.python_job_config.dict.return_value = {} + + permission_builder.build_job_permissions.return_value = [] + compiler = PythonJobConfigCompiler(client, permission_builder, parsed_model, {}) + details = compiler.compile("path") + + # Verify environment_version is used, not client + env_spec = details.additional_job_config["environments"][0]["spec"] + assert "environment_version" in env_spec, ( + "Should use 'environment_version' not deprecated 'client'" + ) + assert "client" not in env_spec, "Should not use deprecated 'client' field" + + def test_compile__respects_user_provided_environments( + self, client, permission_builder, parsed_model, run_name + ): + """Test that user-provided environments in python_job_config are respected. + + See GitHub issue #1277: Users should be able to specify their own + environments configuration to control serverless version. + """ + parsed_model.config.packages = [] + parsed_model.config.index_url = None + parsed_model.config.environment_key = "custom_env" + parsed_model.config.environment_dependencies = [] # No auto-generated deps + + # User provides their own environments configuration + user_environments = [ + { + "environment_key": "custom_env", + "spec": {"environment_version": "3"}, + } + ] + parsed_model.config.python_job_config.dict.return_value = { + "environments": user_environments + } + + permission_builder.build_job_permissions.return_value = [] + compiler = PythonJobConfigCompiler(client, permission_builder, parsed_model, {}) + details = compiler.compile("path") + + # User-provided environments should be preserved + assert details.additional_job_config["environments"] == user_environments + # The environment_key should still be set in job_spec + assert details.job_spec["environment_key"] == "custom_env" + + def test_compile__user_environments_override_auto_generated( + self, client, permission_builder, parsed_model, run_name + ): + """Test that user-provided environments override auto-generated ones. + + See GitHub issue #1277: Even when environment_dependencies are set, + user-provided environments should take precedence. + """ + parsed_model.config.packages = [] + parsed_model.config.index_url = None + parsed_model.config.environment_key = "my_env" + parsed_model.config.environment_dependencies = ["pandas", "numpy"] # Would trigger auto-gen + + # User provides their own environments with specific version + user_environments = [ + { + "environment_key": "my_env", + "spec": {"environment_version": "3", "dependencies": ["requests"]}, + } + ] + parsed_model.config.python_job_config.dict.return_value = { + "environments": user_environments + } + + permission_builder.build_job_permissions.return_value = [] + compiler = PythonJobConfigCompiler(client, permission_builder, parsed_model, {}) + details = compiler.compile("path") + + # User-provided environments should override, not be merged + assert details.additional_job_config["environments"] == user_environments + # Should have user's dependencies, not auto-generated ones + assert details.additional_job_config["environments"][0]["spec"]["dependencies"] == [ + "requests" + ]