Skip to content

Commit 00aec9a

Browse files
Fix FilesExt should fallback when Presigned URL is not available because of unknown Network Zone
1 parent 49eb17b commit 00aec9a

File tree

3 files changed

+107
-1
lines changed

3 files changed

+107
-1
lines changed

NEXT_CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66

77
### Bug Fixes
88

9+
- Fix `FilesExt` can fail to upload and download data when Presigned URLs are not available in certain environments (e.g. Serverless GPU clusters).
10+
911
### Documentation
1012

1113
### Internal Changes

databricks/sdk/mixins/files.py

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
from .._base_client import _BaseClient, _RawResponse, _StreamingResponse
3434
from .._property import _cached_property
3535
from ..config import Config
36-
from ..errors import AlreadyExists, NotFound, PermissionDenied
36+
from ..errors import AlreadyExists, InternalError, NotFound, PermissionDenied
3737
from ..errors.mapper import _error_mapper
3838
from ..retries import retried
3939
from ..service import files
@@ -1650,6 +1650,13 @@ def _do_upload_one_part(
16501650
raise FallbackToUploadUsingFilesApi(None, "Presigned URLs are disabled")
16511651
else:
16521652
raise e from None
1653+
except InternalError as e:
1654+
if self._is_presigned_urls_network_zone_error(e):
1655+
raise FallbackToUploadUsingFilesApi(
1656+
None, "Presigned URLs are not supported in the current network zone"
1657+
)
1658+
else:
1659+
raise e from None
16531660

16541661
upload_part_urls = upload_part_urls_response.get("upload_part_urls", [])
16551662
if len(upload_part_urls) == 0:
@@ -1760,6 +1767,13 @@ def _perform_multipart_upload(
17601767
raise FallbackToUploadUsingFilesApi(buffer, "Presigned URLs are disabled")
17611768
else:
17621769
raise e from None
1770+
except InternalError as e:
1771+
if chunk_offset == 0 and self._is_presigned_urls_network_zone_error(e):
1772+
raise FallbackToUploadUsingFilesApi(
1773+
buffer, "Presigned URLs are not supported in the current network zone"
1774+
)
1775+
else:
1776+
raise e from None
17631777

17641778
upload_part_urls = upload_part_urls_response.get("upload_part_urls", [])
17651779
if len(upload_part_urls) == 0:
@@ -1917,6 +1931,13 @@ def _is_presigned_urls_disabled_error(self, e: PermissionDenied) -> bool:
19171931
return True
19181932
return False
19191933

1934+
def _is_presigned_urls_network_zone_error(self, e: InternalError) -> bool:
1935+
error_infos = e.get_error_info()
1936+
for error_info in error_infos:
1937+
if error_info.reason == "FILES_API_REQUESTER_NETWORK_ZONE_UNKNOWN":
1938+
return True
1939+
return False
1940+
19201941
def _perform_resumable_upload(
19211942
self,
19221943
ctx: _UploadContext,
@@ -1966,6 +1987,13 @@ def _perform_resumable_upload(
19661987
raise FallbackToUploadUsingFilesApi(pre_read_buffer, "Presigned URLs are disabled")
19671988
else:
19681989
raise e from None
1990+
except InternalError as e:
1991+
if self._is_presigned_urls_network_zone_error(e):
1992+
raise FallbackToUploadUsingFilesApi(
1993+
pre_read_buffer, "Presigned URLs are not supported in the current network zone"
1994+
)
1995+
else:
1996+
raise e from None
19691997

19701998
resumable_upload_url_node = resumable_upload_url_response.get("resumable_upload_url")
19711999
if not resumable_upload_url_node:
@@ -2350,6 +2378,11 @@ def _create_download_url(self, file_path: str) -> CreateDownloadUrlResponse:
23502378
raise FallbackToDownloadUsingFilesApi(f"Presigned URLs are disabled")
23512379
else:
23522380
raise e from None
2381+
except InternalError as e:
2382+
if self._is_presigned_urls_network_zone_error(e):
2383+
raise FallbackToDownloadUsingFilesApi("Presigned URLs are not supported in the current network zone")
2384+
else:
2385+
raise e from None
23532386

23542387
def _init_download_response_presigned_api(self, file_path: str, added_headers: dict[str, str]) -> DownloadResponse:
23552388
"""

tests/test_files.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -796,6 +796,25 @@ class PresignedUrlDownloadTestCase:
796796
}
797797
"""
798798

799+
model_serving_presigned_url_internal_error_response = """
800+
{
801+
"error_code": "INTERNAL_ERROR",
802+
"message": "Can't infer requester network zone.",
803+
"details": [
804+
{
805+
"@type": "type.googleapis.com/google.rpc.ErrorInfo",
806+
"reason": "FILES_API_REQUESTER_NETWORK_ZONE_UNKNOWN",
807+
"domain": "filesystem.databricks.com"
808+
},
809+
{
810+
"@type": "type.googleapis.com/google.rpc.RequestInfo",
811+
"request_id": "b2ffb201-ff61-41ad-93e3-50d47654e924",
812+
"serving_data": ""
813+
}
814+
]
815+
}
816+
"""
817+
799818
expired_url_aws_response = (
800819
'<?xml version="1.0" encoding="utf-8"?><Error><Code>'
801820
"AuthenticationFailed</Code><Message>Server failed to authenticate "
@@ -1098,6 +1117,16 @@ def run(self, config: Config, monkeypatch) -> None:
10981117
code=403, only_invocation=1, body=PresignedUrlDownloadTestCase.presigned_url_disabled_response
10991118
),
11001119
),
1120+
PresignedUrlDownloadTestCase(
1121+
name="Presigned URL is not issued because NetworkZone is not populated, should fallback to Files API",
1122+
file_size=100 * 1024 * 1024,
1123+
expected_download_api="files_api",
1124+
custom_response_create_presigned_url=CustomResponse(
1125+
code=500,
1126+
only_invocation=1,
1127+
body=PresignedUrlDownloadTestCase.model_serving_presigned_url_internal_error_response,
1128+
),
1129+
),
11011130
PresignedUrlDownloadTestCase(
11021131
name="Presigned URL fails with 403 when downloading, should fallback to Files API",
11031132
file_size=100 * 1024 * 1024,
@@ -1496,6 +1525,25 @@ class MultipartUploadTestCase(UploadTestCase):
14961525
}
14971526
"""
14981527

1528+
model_serving_presigned_url_internal_error_response = """
1529+
{
1530+
"error_code": "INTERNAL_ERROR",
1531+
"message": "Can't infer requester network zone.",
1532+
"details": [
1533+
{
1534+
"@type": "type.googleapis.com/google.rpc.ErrorInfo",
1535+
"reason": "FILES_API_REQUESTER_NETWORK_ZONE_UNKNOWN",
1536+
"domain": "filesystem.databricks.com"
1537+
},
1538+
{
1539+
"@type": "type.googleapis.com/google.rpc.RequestInfo",
1540+
"request_id": "b2ffb201-ff61-41ad-93e3-50d47654e924",
1541+
"serving_data": ""
1542+
}
1543+
]
1544+
}
1545+
"""
1546+
14991547
def __init__(
15001548
self,
15011549
name: str,
@@ -1963,6 +2011,18 @@ def to_string(test_case: "MultipartUploadTestCase") -> str:
19632011
expected_multipart_upload_aborted=True,
19642012
expected_single_shot_upload=True,
19652013
),
2014+
MultipartUploadTestCase(
2015+
"Create upload URL: fallback to single-shot upload when presigned URLs are not issue because of the NetworkZone is not populated to Filesystem service",
2016+
content_size=1024 * 1024,
2017+
custom_response_on_create_multipart_url=CustomResponse(
2018+
code=500,
2019+
body=MultipartUploadTestCase.model_serving_presigned_url_internal_error_response,
2020+
# 1 failure is enough
2021+
only_invocation=1,
2022+
),
2023+
expected_multipart_upload_aborted=True,
2024+
expected_single_shot_upload=True,
2025+
),
19662026
# -------------------------- failures on part upload --------------------------
19672027
MultipartUploadTestCase(
19682028
"Upload part: 403 response will trigger fallback to single-shot upload on Azure",
@@ -2502,6 +2562,17 @@ def to_string(test_case: "ResumableUploadTestCase") -> str:
25022562
expected_multipart_upload_aborted=False, # upload didn't start
25032563
expected_single_shot_upload=True,
25042564
),
2565+
ResumableUploadTestCase(
2566+
"Create resumable URL: fallback to single-shot upload when presigned URLs are not issued because of the NetworkZone is not populated to Filesystem service",
2567+
stream_size=1024 * 1024,
2568+
custom_response_on_create_resumable_url=CustomResponse(
2569+
code=500,
2570+
body=MultipartUploadTestCase.model_serving_presigned_url_internal_error_response,
2571+
only_invocation=1,
2572+
),
2573+
expected_multipart_upload_aborted=False, # upload didn't start
2574+
expected_single_shot_upload=True,
2575+
),
25052576
ResumableUploadTestCase(
25062577
"Create resumable URL: 500 response is not retried",
25072578
stream_size=1024 * 1024,

0 commit comments

Comments
 (0)