Skip to content

Commit 3f1cea1

Browse files
Fix FilesExt upload fails when content size is zero
1 parent 49eb17b commit 3f1cea1

File tree

3 files changed

+27
-12
lines changed

3 files changed

+27
-12
lines changed

NEXT_CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66

77
### Bug Fixes
88

9+
- Fix `FilesExt.upload` and `FilesExt.upload_from` would fail when the source content is empty and `use_parallel=True`.
10+
911
### Documentation
1012

1113
### Internal Changes

databricks/sdk/mixins/files.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,7 @@
2222
from tempfile import mkstemp
2323
from threading import Event, Thread
2424
from types import TracebackType
25-
from typing import (TYPE_CHECKING, AnyStr, BinaryIO, Callable, Generator,
26-
Iterable, Optional, Type, Union)
25+
from typing import TYPE_CHECKING, AnyStr, BinaryIO, Callable, Generator, Iterable, Optional, Type, Union
2726
from urllib import parse
2827

2928
import requests
@@ -39,8 +38,7 @@
3938
from ..service import files
4039
from ..service._internal import _escape_multi_segment_path_parameter
4140
from ..service.files import DownloadResponse
42-
from .files_utils import (CreateDownloadUrlResponse, _ConcatenatedInputStream,
43-
_PresignedUrlDistributor)
41+
from .files_utils import CreateDownloadUrlResponse, _ConcatenatedInputStream, _PresignedUrlDistributor
4442

4543
if TYPE_CHECKING:
4644
from _typeshed import Self
@@ -1134,7 +1132,9 @@ def upload(
11341132
f"Upload context: part_size={ctx.part_size}, batch_size={ctx.batch_size}, content_length={ctx.content_length}"
11351133
)
11361134

1137-
if ctx.use_parallel:
1135+
if ctx.use_parallel and (
1136+
ctx.content_length is None or ctx.content_length >= self._config.files_ext_multipart_upload_min_stream_size
1137+
):
11381138
self._parallel_upload_from_stream(ctx, content)
11391139
return UploadStreamResult()
11401140
elif ctx.content_length is not None:
@@ -1206,7 +1206,7 @@ def upload_from(
12061206
use_parallel=use_parallel,
12071207
parallelism=parallelism,
12081208
)
1209-
if ctx.use_parallel:
1209+
if ctx.use_parallel and ctx.content_length >= self._config.files_ext_multipart_upload_min_stream_size:
12101210
self._parallel_upload_from_file(ctx)
12111211
return UploadFileResult()
12121212
else:

tests/test_files.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from enum import Enum
1414
from tempfile import NamedTemporaryFile
1515
from threading import Lock
16-
from typing import Any, Callable, Dict, List, Optional, Type, Union
16+
from typing import Any, Callable, List, Optional, Type, Union, Dict
1717
from urllib.parse import parse_qs, urlparse
1818

1919
import pytest
@@ -24,9 +24,14 @@
2424
from databricks.sdk import WorkspaceClient
2525
from databricks.sdk.core import Config
2626
from databricks.sdk.environments import Cloud, DatabricksEnvironment
27-
from databricks.sdk.errors.platform import (AlreadyExists, BadRequest,
28-
InternalError, NotImplemented,
29-
PermissionDenied, TooManyRequests)
27+
from databricks.sdk.errors.platform import (
28+
AlreadyExists,
29+
BadRequest,
30+
InternalError,
31+
NotImplemented,
32+
PermissionDenied,
33+
TooManyRequests,
34+
)
3035
from databricks.sdk.mixins.files import FallbackToDownloadUsingFilesApi
3136
from databricks.sdk.mixins.files_utils import CreateDownloadUrlResponse
3237
from tests.clock import FakeClock
@@ -1662,6 +1667,7 @@ def processor() -> list:
16621667
request_json = request.json()
16631668
etags = {}
16641669

1670+
assert len(request_json["parts"]) > 0
16651671
for part in request_json["parts"]:
16661672
etags[part["part_number"]] = part["etag"]
16671673

@@ -1738,10 +1744,17 @@ def to_string(test_case: "MultipartUploadTestCase") -> str:
17381744
[
17391745
# -------------------------- happy cases --------------------------
17401746
MultipartUploadTestCase(
1741-
"Multipart upload successful: single part",
1747+
"Multipart upload successful: single part because of small file",
17421748
content_size=1024 * 1024, # less than part size
1743-
multipart_upload_part_size=10 * 1024 * 1024,
1749+
multipart_upload_min_stream_size=10 * 1024 * 1024,
17441750
expected_part_size=1024 * 1024, # chunk size is used
1751+
expected_single_shot_upload=True,
1752+
),
1753+
MultipartUploadTestCase(
1754+
"Multipart upload successful: empty file",
1755+
content_size=0, # less than part size
1756+
multipart_upload_min_stream_size=100 * 1024 * 1024, # all files smaller than 100M goes to single-shot
1757+
expected_single_shot_upload=True,
17451758
),
17461759
MultipartUploadTestCase(
17471760
"Multipart upload successful: multiple parts (aligned)",

0 commit comments

Comments
 (0)