Skip to content

Commit 0daa002

Browse files
authored
Make a notebook wrapper for Python wheel tasks optional (#797)
## Changes Instead of always using notebook wrapper for Python wheel tasks, let's make this an opt-in option. Now by default Python wheel tasks will be deployed as is to Databricks platform. If notebook wrapper required (DBR < 13.1 or other configuration differences), users can provide a following experimental setting ``` experimental: python_wheel_wrapper: true ``` Fixes #783, databricks/databricks-asset-bundles-dais2023#8 ## Tests Added unit tests. Integration tests passed for both cases ``` helpers.go:163: [databricks stdout]: Hello from my func helpers.go:163: [databricks stdout]: Got arguments: helpers.go:163: [databricks stdout]: ['my_test_code', 'one', 'two'] ... Bundle remote directory is ***/.bundle/ac05d5e8-ed4b-4e34-b3f2-afa73f62b021 Deleted snapshot file at /var/folders/nt/xjv68qzs45319w4k36dhpylc0000gp/T/TestAccPythonWheelTaskDeployAndRunWithWrapper3733431114/001/.databricks/bundle/default/sync-snapshots/cac1e02f3941a97b.json Successfully deleted files! --- PASS: TestAccPythonWheelTaskDeployAndRunWithWrapper (214.18s) PASS coverage: 93.5% of statements in ./... ok github.com/databricks/cli/internal/bundle 214.495s coverage: 93.5% of statements in ./... ``` ``` helpers.go:163: [databricks stdout]: Hello from my func helpers.go:163: [databricks stdout]: Got arguments: helpers.go:163: [databricks stdout]: ['my_test_code', 'one', 'two'] ... Bundle remote directory is ***/.bundle/0ef67aaf-5960-4049-bf1d-dc9e29157421 Deleted snapshot file at /var/folders/nt/xjv68qzs45319w4k36dhpylc0000gp/T/TestAccPythonWheelTaskDeployAndRunWithoutWrapper2340216760/001/.databricks/bundle/default/sync-snapshots/edf0b322cee93b13.json Successfully deleted files! --- PASS: TestAccPythonWheelTaskDeployAndRunWithoutWrapper (192.36s) PASS coverage: 93.5% of statements in ./... ok github.com/databricks/cli/internal/bundle 195.130s coverage: 93.5% of statements in ./... ```
1 parent e1b5912 commit 0daa002

File tree

9 files changed

+226
-23
lines changed

9 files changed

+226
-23
lines changed

bundle/config/experimental.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,14 @@ package config
22

33
type Experimental struct {
44
Scripts map[ScriptHook]Command `json:"scripts,omitempty"`
5+
6+
// By default Python wheel tasks deployed as is to Databricks platform.
7+
// If notebook wrapper required (for example, used in DBR < 13.1 or other configuration differences), users can provide a following experimental setting
8+
// experimental:
9+
// python_wheel_wrapper: true
10+
// In this case the configured wheel task will be deployed as a notebook task which install defined wheel in runtime and executes it.
11+
// For more details see https://github.com/databricks/cli/pull/797 and https://github.com/databricks/cli/pull/635
12+
PythonWheelWrapper bool `json:"python_wheel_wrapper,omitempty"`
513
}
614

715
type Command string

bundle/config/mutator/if.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
package mutator
2+
3+
import (
4+
"context"
5+
6+
"github.com/databricks/cli/bundle"
7+
)
8+
9+
type ifMutator struct {
10+
condition func(*bundle.Bundle) bool
11+
onTrueMutator bundle.Mutator
12+
onFalseMutator bundle.Mutator
13+
}
14+
15+
func If(
16+
condition func(*bundle.Bundle) bool,
17+
onTrueMutator bundle.Mutator,
18+
onFalseMutator bundle.Mutator,
19+
) bundle.Mutator {
20+
return &ifMutator{
21+
condition, onTrueMutator, onFalseMutator,
22+
}
23+
}
24+
25+
func (m *ifMutator) Apply(ctx context.Context, b *bundle.Bundle) error {
26+
if m.condition(b) {
27+
return bundle.Apply(ctx, b, m.onTrueMutator)
28+
} else {
29+
return bundle.Apply(ctx, b, m.onFalseMutator)
30+
}
31+
}
32+
33+
func (m *ifMutator) Name() string {
34+
return "If"
35+
}

bundle/config/mutator/noop.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
package mutator
2+
3+
import (
4+
"context"
5+
6+
"github.com/databricks/cli/bundle"
7+
)
8+
9+
type noop struct{}
10+
11+
func (*noop) Apply(context.Context, *bundle.Bundle) error {
12+
return nil
13+
}
14+
15+
func (*noop) Name() string {
16+
return "NoOp"
17+
}
18+
19+
func NoOp() bundle.Mutator {
20+
return &noop{}
21+
}
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
package python
2+
3+
import (
4+
"context"
5+
"path"
6+
"path/filepath"
7+
"testing"
8+
9+
"github.com/databricks/cli/bundle"
10+
"github.com/databricks/cli/bundle/config"
11+
"github.com/databricks/cli/bundle/config/resources"
12+
"github.com/databricks/databricks-sdk-go/service/compute"
13+
"github.com/databricks/databricks-sdk-go/service/jobs"
14+
"github.com/stretchr/testify/require"
15+
)
16+
17+
func TestNoTransformByDefault(t *testing.T) {
18+
tmpDir := t.TempDir()
19+
20+
b := &bundle.Bundle{
21+
Config: config.Root{
22+
Path: tmpDir,
23+
Bundle: config.Bundle{
24+
Target: "development",
25+
},
26+
Resources: config.Resources{
27+
Jobs: map[string]*resources.Job{
28+
"job1": {
29+
JobSettings: &jobs.JobSettings{
30+
Tasks: []jobs.Task{
31+
{
32+
TaskKey: "key1",
33+
PythonWheelTask: &jobs.PythonWheelTask{
34+
PackageName: "test_package",
35+
EntryPoint: "main",
36+
},
37+
Libraries: []compute.Library{
38+
{Whl: "/Workspace/Users/[email protected]/bundle/dist/test.whl"},
39+
},
40+
},
41+
},
42+
},
43+
},
44+
},
45+
},
46+
},
47+
}
48+
49+
trampoline := TransformWheelTask()
50+
err := bundle.Apply(context.Background(), b, trampoline)
51+
require.NoError(t, err)
52+
53+
task := b.Config.Resources.Jobs["job1"].Tasks[0]
54+
require.NotNil(t, task.PythonWheelTask)
55+
require.Equal(t, "test_package", task.PythonWheelTask.PackageName)
56+
require.Equal(t, "main", task.PythonWheelTask.EntryPoint)
57+
require.Equal(t, "/Workspace/Users/[email protected]/bundle/dist/test.whl", task.Libraries[0].Whl)
58+
59+
require.Nil(t, task.NotebookTask)
60+
}
61+
62+
func TestTransformWithExperimentalSettingSetToTrue(t *testing.T) {
63+
tmpDir := t.TempDir()
64+
65+
b := &bundle.Bundle{
66+
Config: config.Root{
67+
Path: tmpDir,
68+
Bundle: config.Bundle{
69+
Target: "development",
70+
},
71+
Resources: config.Resources{
72+
Jobs: map[string]*resources.Job{
73+
"job1": {
74+
JobSettings: &jobs.JobSettings{
75+
Tasks: []jobs.Task{
76+
{
77+
TaskKey: "key1",
78+
PythonWheelTask: &jobs.PythonWheelTask{
79+
PackageName: "test_package",
80+
EntryPoint: "main",
81+
},
82+
Libraries: []compute.Library{
83+
{Whl: "/Workspace/Users/[email protected]/bundle/dist/test.whl"},
84+
},
85+
},
86+
},
87+
},
88+
},
89+
},
90+
},
91+
Experimental: &config.Experimental{
92+
PythonWheelWrapper: true,
93+
},
94+
},
95+
}
96+
97+
trampoline := TransformWheelTask()
98+
err := bundle.Apply(context.Background(), b, trampoline)
99+
require.NoError(t, err)
100+
101+
task := b.Config.Resources.Jobs["job1"].Tasks[0]
102+
require.Nil(t, task.PythonWheelTask)
103+
require.NotNil(t, task.NotebookTask)
104+
105+
dir, err := b.InternalDir(context.Background())
106+
require.NoError(t, err)
107+
108+
internalDirRel, err := filepath.Rel(b.Config.Path, dir)
109+
require.NoError(t, err)
110+
111+
require.Equal(t, path.Join(filepath.ToSlash(internalDirRel), "notebook_job1_key1"), task.NotebookTask.NotebookPath)
112+
113+
require.Empty(t, task.Libraries)
114+
}

bundle/python/transform.go

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,16 @@ dbutils.notebook.exit(s)
4949
// which installs uploaded wheels using %pip and then calling corresponding
5050
// entry point.
5151
func TransformWheelTask() bundle.Mutator {
52-
return mutator.NewTrampoline(
53-
"python_wheel",
54-
&pythonTrampoline{},
55-
NOTEBOOK_TEMPLATE,
52+
return mutator.If(
53+
func(b *bundle.Bundle) bool {
54+
return b.Config.Experimental != nil && b.Config.Experimental.PythonWheelWrapper
55+
},
56+
mutator.NewTrampoline(
57+
"python_wheel",
58+
&pythonTrampoline{},
59+
NOTEBOOK_TEMPLATE,
60+
),
61+
mutator.NoOp(),
5662
)
5763
}
5864

@@ -113,7 +119,7 @@ func (t *pythonTrampoline) generateParameters(task *jobs.PythonWheelTask) (strin
113119
if task.Parameters != nil && task.NamedParameters != nil {
114120
return "", fmt.Errorf("not allowed to pass both paramaters and named_parameters")
115121
}
116-
params := append([]string{"python"}, task.Parameters...)
122+
params := append([]string{task.PackageName}, task.Parameters...)
117123
for k, v := range task.NamedParameters {
118124
params = append(params, fmt.Sprintf("%s=%s", k, v))
119125
}

bundle/python/transform_test.go

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -25,26 +25,26 @@ type testCaseNamed struct {
2525
}
2626

2727
var paramsTestCases []testCase = []testCase{
28-
{[]string{}, `"python"`},
29-
{[]string{"a"}, `"python", "a"`},
30-
{[]string{"a", "b"}, `"python", "a", "b"`},
31-
{[]string{"123!@#$%^&*()-="}, `"python", "123!@#$%^&*()-="`},
32-
{[]string{`{"a": 1}`}, `"python", "{\"a\": 1}"`},
28+
{[]string{}, `"my_test_code"`},
29+
{[]string{"a"}, `"my_test_code", "a"`},
30+
{[]string{"a", "b"}, `"my_test_code", "a", "b"`},
31+
{[]string{"123!@#$%^&*()-="}, `"my_test_code", "123!@#$%^&*()-="`},
32+
{[]string{`{"a": 1}`}, `"my_test_code", "{\"a\": 1}"`},
3333
}
3434

3535
var paramsTestCasesNamed []testCaseNamed = []testCaseNamed{
36-
{map[string]string{}, `"python"`},
37-
{map[string]string{"a": "1"}, `"python", "a=1"`},
38-
{map[string]string{"a": "'1'"}, `"python", "a='1'"`},
39-
{map[string]string{"a": `"1"`}, `"python", "a=\"1\""`},
40-
{map[string]string{"a": "1", "b": "2"}, `"python", "a=1", "b=2"`},
41-
{map[string]string{"data": `{"a": 1}`}, `"python", "data={\"a\": 1}"`},
36+
{map[string]string{}, `"my_test_code"`},
37+
{map[string]string{"a": "1"}, `"my_test_code", "a=1"`},
38+
{map[string]string{"a": "'1'"}, `"my_test_code", "a='1'"`},
39+
{map[string]string{"a": `"1"`}, `"my_test_code", "a=\"1\""`},
40+
{map[string]string{"a": "1", "b": "2"}, `"my_test_code", "a=1", "b=2"`},
41+
{map[string]string{"data": `{"a": 1}`}, `"my_test_code", "data={\"a\": 1}"`},
4242
}
4343

4444
func TestGenerateParameters(t *testing.T) {
4545
trampoline := pythonTrampoline{}
4646
for _, c := range paramsTestCases {
47-
task := &jobs.PythonWheelTask{Parameters: c.Actual}
47+
task := &jobs.PythonWheelTask{PackageName: "my_test_code", Parameters: c.Actual}
4848
result, err := trampoline.generateParameters(task)
4949
require.NoError(t, err)
5050
require.Equal(t, c.Expected, result)
@@ -54,7 +54,7 @@ func TestGenerateParameters(t *testing.T) {
5454
func TestGenerateNamedParameters(t *testing.T) {
5555
trampoline := pythonTrampoline{}
5656
for _, c := range paramsTestCasesNamed {
57-
task := &jobs.PythonWheelTask{NamedParameters: c.Actual}
57+
task := &jobs.PythonWheelTask{PackageName: "my_test_code", NamedParameters: c.Actual}
5858
result, err := trampoline.generateParameters(task)
5959
require.NoError(t, err)
6060

internal/bundle/bundles/python_wheel_task/databricks_template_schema.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@
1616
"unique_id": {
1717
"type": "string",
1818
"description": "Unique ID for job name"
19+
},
20+
"python_wheel_wrapper": {
21+
"type": "boolean",
22+
"description": "Whether or not to enable python wheel wrapper"
1923
}
2024
}
2125
}

internal/bundle/bundles/python_wheel_task/template/databricks.yml.tmpl

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,11 @@ bundle:
44
workspace:
55
root_path: "~/.bundle/{{.unique_id}}"
66

7+
{{if .python_wheel_wrapper}}
8+
experimental:
9+
python_wheel_wrapper: true
10+
{{end}}
11+
712
resources:
813
jobs:
914
some_other_job:
@@ -14,6 +19,7 @@ resources:
1419
num_workers: 1
1520
spark_version: "{{.spark_version}}"
1621
node_type_id: "{{.node_type_id}}"
22+
data_security_mode: USER_ISOLATION
1723
python_wheel_task:
1824
package_name: my_test_code
1925
entry_point: run

internal/bundle/python_wheel_test.go

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import (
88
"github.com/stretchr/testify/require"
99
)
1010

11-
func TestAccPythonWheelTaskDeployAndRun(t *testing.T) {
11+
func runPythonWheelTest(t *testing.T, pythonWheelWrapper bool) {
1212
env := internal.GetEnvOrSkipTest(t, "CLOUD_ENV")
1313
t.Log(env)
1414

@@ -22,9 +22,10 @@ func TestAccPythonWheelTaskDeployAndRun(t *testing.T) {
2222
}
2323

2424
bundleRoot, err := initTestTemplate(t, "python_wheel_task", map[string]any{
25-
"node_type_id": nodeTypeId,
26-
"unique_id": uuid.New().String(),
27-
"spark_version": "13.2.x-snapshot-scala2.12",
25+
"node_type_id": nodeTypeId,
26+
"unique_id": uuid.New().String(),
27+
"spark_version": "13.2.x-snapshot-scala2.12",
28+
"python_wheel_wrapper": pythonWheelWrapper,
2829
})
2930
require.NoError(t, err)
3031

@@ -39,5 +40,13 @@ func TestAccPythonWheelTaskDeployAndRun(t *testing.T) {
3940
require.NoError(t, err)
4041
require.Contains(t, out, "Hello from my func")
4142
require.Contains(t, out, "Got arguments:")
42-
require.Contains(t, out, "['python', 'one', 'two']")
43+
require.Contains(t, out, "['my_test_code', 'one', 'two']")
44+
}
45+
46+
func TestAccPythonWheelTaskDeployAndRunWithoutWrapper(t *testing.T) {
47+
runPythonWheelTest(t, false)
48+
}
49+
50+
func TestAccPythonWheelTaskDeployAndRunWithWrapper(t *testing.T) {
51+
runPythonWheelTest(t, true)
4352
}

0 commit comments

Comments
 (0)