Skip to content

Commit b4ffd99

Browse files
Fix FilesExt should fallback when Presigned URL is not available (#1093)
## What changes are proposed in this pull request? Provide the readers and reviewers with the information they need to understand this PR in a comprehensive manner. Specifically, try to answer the two following questions: - **WHAT** - Fix `FilesExt`'s upload and download should fallback when Presigned URLs are not available in certain compute types. - **WHY** - In some newer compute clusters (e.g. model serving clusters), creating presigned URL through Files API is not possible yet. Client should fallback in such circumstances to not using Presigned URL. Or else the customer wouldn't be able to upload or download data using w.files. ## How is this tested? It is tested using unit tests.
1 parent 1c012fe commit b4ffd99

File tree

3 files changed

+106
-1
lines changed

3 files changed

+106
-1
lines changed

NEXT_CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
### New Features and Improvements
66

77
### Bug Fixes
8+
- Fix `FilesExt` can fail to upload and download data when Presigned URLs are not available in certain environments (e.g. Serverless GPU clusters).
89

910
### Documentation
1011

databricks/sdk/mixins/files.py

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
from .._base_client import _BaseClient, _RawResponse, _StreamingResponse
3434
from .._property import _cached_property
3535
from ..config import Config
36-
from ..errors import AlreadyExists, NotFound, PermissionDenied
36+
from ..errors import AlreadyExists, InternalError, NotFound, PermissionDenied
3737
from ..errors.mapper import _error_mapper
3838
from ..retries import retried
3939
from ..service import files
@@ -1650,6 +1650,13 @@ def _do_upload_one_part(
16501650
raise FallbackToUploadUsingFilesApi(None, "Presigned URLs are disabled")
16511651
else:
16521652
raise e from None
1653+
except InternalError as e:
1654+
if self._is_presigned_urls_network_zone_error(e):
1655+
raise FallbackToUploadUsingFilesApi(
1656+
None, "Presigned URLs are not supported in the current network zone"
1657+
)
1658+
else:
1659+
raise e from None
16531660

16541661
upload_part_urls = upload_part_urls_response.get("upload_part_urls", [])
16551662
if len(upload_part_urls) == 0:
@@ -1760,6 +1767,13 @@ def _perform_multipart_upload(
17601767
raise FallbackToUploadUsingFilesApi(buffer, "Presigned URLs are disabled")
17611768
else:
17621769
raise e from None
1770+
except InternalError as e:
1771+
if chunk_offset == 0 and self._is_presigned_urls_network_zone_error(e):
1772+
raise FallbackToUploadUsingFilesApi(
1773+
buffer, "Presigned URLs are not supported in the current network zone"
1774+
)
1775+
else:
1776+
raise e from None
17631777

17641778
upload_part_urls = upload_part_urls_response.get("upload_part_urls", [])
17651779
if len(upload_part_urls) == 0:
@@ -1917,6 +1931,13 @@ def _is_presigned_urls_disabled_error(self, e: PermissionDenied) -> bool:
19171931
return True
19181932
return False
19191933

1934+
def _is_presigned_urls_network_zone_error(self, e: InternalError) -> bool:
1935+
error_infos = e.get_error_info()
1936+
for error_info in error_infos:
1937+
if error_info.reason == "FILES_API_REQUESTER_NETWORK_ZONE_UNKNOWN":
1938+
return True
1939+
return False
1940+
19201941
def _perform_resumable_upload(
19211942
self,
19221943
ctx: _UploadContext,
@@ -1966,6 +1987,13 @@ def _perform_resumable_upload(
19661987
raise FallbackToUploadUsingFilesApi(pre_read_buffer, "Presigned URLs are disabled")
19671988
else:
19681989
raise e from None
1990+
except InternalError as e:
1991+
if self._is_presigned_urls_network_zone_error(e):
1992+
raise FallbackToUploadUsingFilesApi(
1993+
pre_read_buffer, "Presigned URLs are not supported in the current network zone"
1994+
)
1995+
else:
1996+
raise e from None
19691997

19701998
resumable_upload_url_node = resumable_upload_url_response.get("resumable_upload_url")
19711999
if not resumable_upload_url_node:
@@ -2350,6 +2378,11 @@ def _create_download_url(self, file_path: str) -> CreateDownloadUrlResponse:
23502378
raise FallbackToDownloadUsingFilesApi(f"Presigned URLs are disabled")
23512379
else:
23522380
raise e from None
2381+
except InternalError as e:
2382+
if self._is_presigned_urls_network_zone_error(e):
2383+
raise FallbackToDownloadUsingFilesApi("Presigned URLs are not supported in the current network zone")
2384+
else:
2385+
raise e from None
23532386

23542387
def _init_download_response_presigned_api(self, file_path: str, added_headers: dict[str, str]) -> DownloadResponse:
23552388
"""

tests/test_files.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -796,6 +796,25 @@ class PresignedUrlDownloadTestCase:
796796
}
797797
"""
798798

799+
model_serving_presigned_url_internal_error_response = """
800+
{
801+
"error_code": "INTERNAL_ERROR",
802+
"message": "Can't infer requester network zone.",
803+
"details": [
804+
{
805+
"@type": "type.googleapis.com/google.rpc.ErrorInfo",
806+
"reason": "FILES_API_REQUESTER_NETWORK_ZONE_UNKNOWN",
807+
"domain": "filesystem.databricks.com"
808+
},
809+
{
810+
"@type": "type.googleapis.com/google.rpc.RequestInfo",
811+
"request_id": "b2ffb201-ff61-41ad-93e3-50d47654e924",
812+
"serving_data": ""
813+
}
814+
]
815+
}
816+
"""
817+
799818
expired_url_aws_response = (
800819
'<?xml version="1.0" encoding="utf-8"?><Error><Code>'
801820
"AuthenticationFailed</Code><Message>Server failed to authenticate "
@@ -1098,6 +1117,16 @@ def run(self, config: Config, monkeypatch) -> None:
10981117
code=403, only_invocation=1, body=PresignedUrlDownloadTestCase.presigned_url_disabled_response
10991118
),
11001119
),
1120+
PresignedUrlDownloadTestCase(
1121+
name="Presigned URL is not issued because NetworkZone is not populated, should fallback to Files API",
1122+
file_size=100 * 1024 * 1024,
1123+
expected_download_api="files_api",
1124+
custom_response_create_presigned_url=CustomResponse(
1125+
code=500,
1126+
only_invocation=1,
1127+
body=PresignedUrlDownloadTestCase.model_serving_presigned_url_internal_error_response,
1128+
),
1129+
),
11011130
PresignedUrlDownloadTestCase(
11021131
name="Presigned URL fails with 403 when downloading, should fallback to Files API",
11031132
file_size=100 * 1024 * 1024,
@@ -1496,6 +1525,25 @@ class MultipartUploadTestCase(UploadTestCase):
14961525
}
14971526
"""
14981527

1528+
model_serving_presigned_url_internal_error_response = """
1529+
{
1530+
"error_code": "INTERNAL_ERROR",
1531+
"message": "Can't infer requester network zone.",
1532+
"details": [
1533+
{
1534+
"@type": "type.googleapis.com/google.rpc.ErrorInfo",
1535+
"reason": "FILES_API_REQUESTER_NETWORK_ZONE_UNKNOWN",
1536+
"domain": "filesystem.databricks.com"
1537+
},
1538+
{
1539+
"@type": "type.googleapis.com/google.rpc.RequestInfo",
1540+
"request_id": "b2ffb201-ff61-41ad-93e3-50d47654e924",
1541+
"serving_data": ""
1542+
}
1543+
]
1544+
}
1545+
"""
1546+
14991547
def __init__(
15001548
self,
15011549
name: str,
@@ -1963,6 +2011,18 @@ def to_string(test_case: "MultipartUploadTestCase") -> str:
19632011
expected_multipart_upload_aborted=True,
19642012
expected_single_shot_upload=True,
19652013
),
2014+
MultipartUploadTestCase(
2015+
"Create upload URL: fallback to single-shot upload when presigned URLs are not issue because of the NetworkZone is not populated to Filesystem service",
2016+
content_size=1024 * 1024,
2017+
custom_response_on_create_multipart_url=CustomResponse(
2018+
code=500,
2019+
body=MultipartUploadTestCase.model_serving_presigned_url_internal_error_response,
2020+
# 1 failure is enough
2021+
only_invocation=1,
2022+
),
2023+
expected_multipart_upload_aborted=True,
2024+
expected_single_shot_upload=True,
2025+
),
19662026
# -------------------------- failures on part upload --------------------------
19672027
MultipartUploadTestCase(
19682028
"Upload part: 403 response will trigger fallback to single-shot upload on Azure",
@@ -2502,6 +2562,17 @@ def to_string(test_case: "ResumableUploadTestCase") -> str:
25022562
expected_multipart_upload_aborted=False, # upload didn't start
25032563
expected_single_shot_upload=True,
25042564
),
2565+
ResumableUploadTestCase(
2566+
"Create resumable URL: fallback to single-shot upload when presigned URLs are not issued because of the NetworkZone is not populated to Filesystem service",
2567+
stream_size=1024 * 1024,
2568+
custom_response_on_create_resumable_url=CustomResponse(
2569+
code=500,
2570+
body=MultipartUploadTestCase.model_serving_presigned_url_internal_error_response,
2571+
only_invocation=1,
2572+
),
2573+
expected_multipart_upload_aborted=False, # upload didn't start
2574+
expected_single_shot_upload=True,
2575+
),
25052576
ResumableUploadTestCase(
25062577
"Create resumable URL: 500 response is not retried",
25072578
stream_size=1024 * 1024,

0 commit comments

Comments
 (0)