Skip to content

Commit 7beb0fb

Browse files
Add validation mutator for volume artifact_path (#2050)
## Changes This PR: 1. Incrementally improves the error messages shown to the user when the volume they are referring to in `workspace.artifact_path` does not exist. 2. Performs this validation in both `bundle validate` and `bundle deploy` compared to before on just deployments. 3. It runs "fast" validations on `bundle deploy`, which earlier were only run on `bundle validate`. ## Tests Unit tests and manually. Also, existing integration tests provide coverage (`TestUploadArtifactToVolumeNotYetDeployed`, `TestUploadArtifactFileToVolumeThatDoesNotExist`) Examples: ``` .venv➜ bundle-playground git:(master) ✗ cli bundle validate Error: cannot access volume capital.whatever.my_volume: User does not have READ VOLUME on Volume 'capital.whatever.my_volume'. at workspace.artifact_path in databricks.yml:7:18 ``` and ``` .venv➜ bundle-playground git:(master) ✗ cli bundle validate Error: volume capital.whatever.foobar does not exist at workspace.artifact_path resources.volumes.foo in databricks.yml:7:18 databricks.yml:12:7 You are using a volume in your artifact_path that is managed by this bundle but which has not been deployed yet. Please first deploy the volume using 'bundle deploy' and then switch over to using it in the artifact_path. ```
1 parent 509f5ab commit 7beb0fb

File tree

11 files changed

+444
-398
lines changed

11 files changed

+444
-398
lines changed
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
package validate
2+
3+
import (
4+
"context"
5+
6+
"github.com/databricks/cli/bundle"
7+
"github.com/databricks/cli/libs/diag"
8+
)
9+
10+
// FastValidate runs a subset of fast validation checks. This is a subset of the full
11+
// suite of validation mutators that satisfy ANY ONE of the following criteria:
12+
//
13+
// 1. No file i/o or network requests are made in the mutator.
14+
// 2. The validation is blocking for bundle deployments.
15+
//
16+
// The full suite of validation mutators is available in the [Validate] mutator.
17+
type fastValidateReadonly struct{}
18+
19+
func FastValidateReadonly() bundle.ReadOnlyMutator {
20+
return &fastValidateReadonly{}
21+
}
22+
23+
func (f *fastValidateReadonly) Name() string {
24+
return "fast_validate(readonly)"
25+
}
26+
27+
func (f *fastValidateReadonly) Apply(ctx context.Context, rb bundle.ReadOnlyBundle) diag.Diagnostics {
28+
return bundle.ApplyReadOnly(ctx, rb, bundle.Parallel(
29+
// Fast mutators with only in-memory checks
30+
JobClusterKeyDefined(),
31+
JobTaskClusterSpec(),
32+
SingleNodeCluster(),
33+
34+
// Blocking mutators. Deployments will fail if these checks fail.
35+
ValidateArtifactPath(),
36+
))
37+
}
38+
39+
type fastValidate struct{}
40+
41+
func FastValidate() bundle.Mutator {
42+
return &fastValidate{}
43+
}
44+
45+
func (f *fastValidate) Name() string {
46+
return "fast_validate"
47+
}
48+
49+
func (f *fastValidate) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
50+
return bundle.ApplyReadOnly(ctx, bundle.ReadOnly(b), FastValidateReadonly())
51+
}

bundle/config/validate/validate.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,13 @@ func (l location) Path() dyn.Path {
3030
// Apply implements bundle.Mutator.
3131
func (v *validate) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
3232
return bundle.ApplyReadOnly(ctx, bundle.ReadOnly(b), bundle.Parallel(
33-
JobClusterKeyDefined(),
33+
FastValidateReadonly(),
34+
35+
// Slow mutators that require network or file i/o. These are only
36+
// run in the `bundle validate` command.
3437
FilesToSync(),
35-
ValidateSyncPatterns(),
36-
JobTaskClusterSpec(),
3738
ValidateFolderPermissions(),
38-
SingleNodeCluster(),
39+
ValidateSyncPatterns(),
3940
))
4041
}
4142

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
package validate
2+
3+
import (
4+
"context"
5+
"errors"
6+
"fmt"
7+
"slices"
8+
"strings"
9+
10+
"github.com/databricks/cli/bundle"
11+
"github.com/databricks/cli/bundle/config"
12+
"github.com/databricks/cli/bundle/libraries"
13+
"github.com/databricks/cli/libs/diag"
14+
"github.com/databricks/cli/libs/dyn"
15+
"github.com/databricks/cli/libs/dyn/dynvar"
16+
"github.com/databricks/databricks-sdk-go/apierr"
17+
)
18+
19+
type validateArtifactPath struct{}
20+
21+
func ValidateArtifactPath() bundle.ReadOnlyMutator {
22+
return &validateArtifactPath{}
23+
}
24+
25+
func (v *validateArtifactPath) Name() string {
26+
return "validate:artifact_paths"
27+
}
28+
29+
func extractVolumeFromPath(artifactPath string) (string, string, string, error) {
30+
if !libraries.IsVolumesPath(artifactPath) {
31+
return "", "", "", fmt.Errorf("expected artifact_path to start with /Volumes/, got %s", artifactPath)
32+
}
33+
34+
parts := strings.Split(artifactPath, "/")
35+
volumeFormatErr := fmt.Errorf("expected UC volume path to be in the format /Volumes/<catalog>/<schema>/<volume>/..., got %s", artifactPath)
36+
37+
// Incorrect format.
38+
if len(parts) < 5 {
39+
return "", "", "", volumeFormatErr
40+
}
41+
42+
catalogName := parts[2]
43+
schemaName := parts[3]
44+
volumeName := parts[4]
45+
46+
// Incorrect format.
47+
if catalogName == "" || schemaName == "" || volumeName == "" {
48+
return "", "", "", volumeFormatErr
49+
}
50+
51+
return catalogName, schemaName, volumeName, nil
52+
}
53+
54+
func findVolumeInBundle(r config.Root, catalogName, schemaName, volumeName string) (dyn.Path, []dyn.Location, bool) {
55+
volumes := r.Resources.Volumes
56+
for k, v := range volumes {
57+
if v.CatalogName != catalogName || v.Name != volumeName {
58+
continue
59+
}
60+
// UC schemas can be defined in the bundle itself, and thus might be interpolated
61+
// at runtime via the ${resources.schemas.<name>} syntax. Thus we match the volume
62+
// definition if the schema name is the same as the one in the bundle, or if the
63+
// schema name is interpolated.
64+
// We only have to check for ${resources.schemas...} references because any
65+
// other valid reference (like ${var.foo}) would have been interpolated by this point.
66+
p, ok := dynvar.PureReferenceToPath(v.SchemaName)
67+
isSchemaDefinedInBundle := ok && p.HasPrefix(dyn.Path{dyn.Key("resources"), dyn.Key("schemas")})
68+
if v.SchemaName != schemaName && !isSchemaDefinedInBundle {
69+
continue
70+
}
71+
pathString := fmt.Sprintf("resources.volumes.%s", k)
72+
return dyn.MustPathFromString(pathString), r.GetLocations(pathString), true
73+
}
74+
return nil, nil, false
75+
}
76+
77+
func (v *validateArtifactPath) Apply(ctx context.Context, rb bundle.ReadOnlyBundle) diag.Diagnostics {
78+
// We only validate UC Volumes paths right now.
79+
if !libraries.IsVolumesPath(rb.Config().Workspace.ArtifactPath) {
80+
return nil
81+
}
82+
83+
wrapErrorMsg := func(s string) diag.Diagnostics {
84+
return diag.Diagnostics{
85+
{
86+
Summary: s,
87+
Severity: diag.Error,
88+
Locations: rb.Config().GetLocations("workspace.artifact_path"),
89+
Paths: []dyn.Path{dyn.MustPathFromString("workspace.artifact_path")},
90+
},
91+
}
92+
}
93+
94+
catalogName, schemaName, volumeName, err := extractVolumeFromPath(rb.Config().Workspace.ArtifactPath)
95+
if err != nil {
96+
return wrapErrorMsg(err.Error())
97+
}
98+
volumeFullName := fmt.Sprintf("%s.%s.%s", catalogName, schemaName, volumeName)
99+
w := rb.WorkspaceClient()
100+
_, err = w.Volumes.ReadByName(ctx, volumeFullName)
101+
102+
if errors.Is(err, apierr.ErrPermissionDenied) {
103+
return wrapErrorMsg(fmt.Sprintf("cannot access volume %s: %s", volumeFullName, err))
104+
}
105+
if errors.Is(err, apierr.ErrNotFound) {
106+
path, locations, ok := findVolumeInBundle(rb.Config(), catalogName, schemaName, volumeName)
107+
if !ok {
108+
return wrapErrorMsg(fmt.Sprintf("volume %s does not exist", volumeFullName))
109+
}
110+
111+
// If the volume is defined in the bundle, provide a more helpful error diagnostic,
112+
// with more details and location information.
113+
return diag.Diagnostics{{
114+
Summary: fmt.Sprintf("volume %s does not exist", volumeFullName),
115+
Severity: diag.Error,
116+
Detail: `You are using a volume in your artifact_path that is managed by
117+
this bundle but which has not been deployed yet. Please first deploy
118+
the volume using 'bundle deploy' and then switch over to using it in
119+
the artifact_path.`,
120+
Locations: slices.Concat(rb.Config().GetLocations("workspace.artifact_path"), locations),
121+
Paths: append([]dyn.Path{dyn.MustPathFromString("workspace.artifact_path")}, path),
122+
}}
123+
124+
}
125+
if err != nil {
126+
return wrapErrorMsg(fmt.Sprintf("cannot read volume %s: %s", volumeFullName, err))
127+
}
128+
return nil
129+
}

0 commit comments

Comments
 (0)