Skip to content

Commit c55065c

Browse files
Skip large files during export-dir instead of failing (#3984)
Fixes #3691 Skip large files in workspace export-dir with warnings instead of failing ## Changes - Modified `workspace export-dir` to handle files exceeding the 10MB size limit gracefully - When a file exceeds the limit, log a warning and continue exporting other files - Display a summary of all skipped files at the end of the export - Added integration test `TestExportDirSkipsLargeFiles` to verify the behavior ## Why Previously, when exporting a directory containing files larger than 10MB (the workspace API limit), the entire command would fail with an error. This made it impossible to export directories containing a mix of small and large files. Users would have to manually identify and exclude large files before running the export. Now the command skips oversized files with a clear warning, allowing users to export what they can and see which files were skipped in a summary at the end. ## Tests - Added integration test `TestExportDirSkipsLargeFiles` that: - Creates an 11MB file (exceeds 10MB limit) and smaller files - Verifies large file is skipped with appropriate warnings in stderr - Verifies smaller files are exported successfully - Verifies no empty file is created locally for skipped files
1 parent fc6eb0a commit c55065c

File tree

6 files changed

+107
-6
lines changed

6 files changed

+107
-6
lines changed

acceptance/cmd/workspace/export-dir-file-size-limit/out.test.toml

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
2+
>>> [CLI] workspace export-dir /test-dir [TEST_TMP_DIR]/export
3+
Exporting files from /test-dir
4+
Warning: /test-dir/file.py (skipped; file too large)
5+
6+
The following files were skipped because they exceed the maximum size limit:
7+
- /test-dir/file.py (skipped; file too large)
8+
9+
Export complete
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
$CLI workspace import /test-dir/file.py --file test_file.py --format AUTO --language PYTHON
2+
3+
mkdir -p "$TEST_TMP_DIR/export"
4+
trace $CLI workspace export-dir /test-dir "$TEST_TMP_DIR/export"
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
Local = true
2+
Cloud = false
3+
4+
[Env]
5+
MSYS_NO_PATHCONV = "1"
6+
7+
[[Server]]
8+
Pattern = "GET /api/2.0/workspace/list"
9+
Response.Body = '''
10+
{
11+
"objects": [
12+
{
13+
"path": "/test-dir/file.py",
14+
"object_type": "NOTEBOOK",
15+
"language": "PYTHON",
16+
"object_id": 123,
17+
"size": 1000,
18+
"created_at": 1609459200000,
19+
"modified_at": 1609459200000
20+
}
21+
]
22+
}
23+
'''
24+
25+
[[Server]]
26+
Pattern = "GET /api/2.0/workspace/export"
27+
Response.StatusCode = 400
28+
Response.Body = '''
29+
{
30+
"error_code": "MAX_NOTEBOOK_SIZE_EXCEEDED",
31+
"message": "Notebook size exceeds maximum allowed size"
32+
}
33+
'''
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
print("hello")

cmd/workspace/workspace/export_dir.go

Lines changed: 55 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@ package workspace
22

33
import (
44
"context"
5+
"errors"
56
"io"
67
"io/fs"
8+
"net/http"
79
"os"
810
"path"
911
"path/filepath"
@@ -13,6 +15,7 @@ import (
1315
"github.com/databricks/cli/libs/cmdio"
1416
"github.com/databricks/cli/libs/filer"
1517
"github.com/databricks/cli/libs/notebook"
18+
"github.com/databricks/databricks-sdk-go/apierr"
1619
"github.com/databricks/databricks-sdk-go/service/workspace"
1720
"github.com/spf13/cobra"
1821
)
@@ -21,11 +24,36 @@ type exportDirOptions struct {
2124
sourceDir string
2225
targetDir string
2326
overwrite bool
27+
warnings []string
28+
}
29+
30+
// isFileSizeError checks if the error is due to file size limits.
31+
func isFileSizeError(err error) bool {
32+
var aerr *apierr.APIError
33+
if !errors.As(err, &aerr) || aerr.StatusCode != http.StatusBadRequest {
34+
return false
35+
}
36+
37+
// Check ErrorCode field
38+
if aerr.ErrorCode == "MAX_NOTEBOOK_SIZE_EXCEEDED" || aerr.ErrorCode == "MAX_READ_SIZE_EXCEEDED" {
39+
return true
40+
}
41+
42+
// Check ErrorInfo.Reason field
43+
details := aerr.ErrorDetails()
44+
if details.ErrorInfo != nil {
45+
reason := details.ErrorInfo.Reason
46+
if reason == "MAX_NOTEBOOK_SIZE_EXCEEDED" || reason == "MAX_READ_SIZE_EXCEEDED" {
47+
return true
48+
}
49+
}
50+
51+
return false
2452
}
2553

2654
// The callback function exports the file specified at relPath. This function is
2755
// meant to be used in conjunction with fs.WalkDir
28-
func (opts exportDirOptions) callback(ctx context.Context, workspaceFiler filer.Filer) func(string, fs.DirEntry, error) error {
56+
func (opts *exportDirOptions) callback(ctx context.Context, workspaceFiler filer.Filer) func(string, fs.DirEntry, error) error {
2957
sourceDir := opts.sourceDir
3058
targetDir := opts.targetDir
3159
overwrite := opts.overwrite
@@ -59,18 +87,27 @@ func (opts exportDirOptions) callback(ctx context.Context, workspaceFiler filer.
5987
return cmdio.RenderWithTemplate(ctx, newFileSkippedEvent(relPath, targetPath), "", "{{.SourcePath}} -> {{.TargetPath}} (skipped; already exists)\n")
6088
}
6189

62-
// create the file
63-
f, err := os.Create(targetPath)
90+
// Write content to the local file
91+
r, err := workspaceFiler.Read(ctx, relPath)
6492
if err != nil {
93+
// Check if this is a file size limit error
94+
if isFileSizeError(err) {
95+
warning := sourcePath + " (skipped; file too large)"
96+
cmdio.LogString(ctx, "Warning: "+warning)
97+
opts.warnings = append(opts.warnings, warning)
98+
return nil
99+
}
65100
return err
66101
}
67-
defer f.Close()
102+
defer r.Close()
68103

69-
// Write content to the local file
70-
r, err := workspaceFiler.Read(ctx, relPath)
104+
// create the file
105+
f, err := os.Create(targetPath)
71106
if err != nil {
72107
return err
73108
}
109+
defer f.Close()
110+
74111
_, err = io.Copy(f, r)
75112
if err != nil {
76113
return err
@@ -103,6 +140,7 @@ func newExportDir() *cobra.Command {
103140
w := cmdctx.WorkspaceClient(ctx)
104141
opts.sourceDir = args[0]
105142
opts.targetDir = args[1]
143+
opts.warnings = []string{}
106144

107145
// Initialize a filer and a file system on the source directory
108146
workspaceFiler, err := filer.NewWorkspaceFilesClient(w, opts.sourceDir)
@@ -120,6 +158,17 @@ func newExportDir() *cobra.Command {
120158
if err != nil {
121159
return err
122160
}
161+
162+
// Print all warnings at the end if any were collected
163+
if len(opts.warnings) > 0 {
164+
cmdio.LogString(ctx, "")
165+
cmdio.LogString(ctx, "The following files were skipped because they exceed the maximum size limit:")
166+
for _, warning := range opts.warnings {
167+
cmdio.LogString(ctx, " - "+warning)
168+
}
169+
cmdio.LogString(ctx, "")
170+
}
171+
123172
return cmdio.RenderWithTemplate(ctx, newExportCompletedEvent(opts.targetDir), "", "Export complete\n")
124173
}
125174

0 commit comments

Comments
 (0)