From 3e419918e73209e4de23138ad5607eb557488d33 Mon Sep 17 00:00:00 2001 From: Yuanjie Ding Date: Tue, 28 Oct 2025 23:15:06 +0100 Subject: [PATCH] Fix FilesExt should fallback when Presigned URL is not available because of unknown Network Zone --- NEXT_CHANGELOG.md | 1 + databricks/sdk/mixins/files.py | 35 ++++++++++++++++- tests/test_files.py | 71 ++++++++++++++++++++++++++++++++++ 3 files changed, 106 insertions(+), 1 deletion(-) diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index f7c830eeb..2e7b46f0a 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -5,6 +5,7 @@ ### New Features and Improvements ### Bug Fixes +- Fix `FilesExt` can fail to upload and download data when Presigned URLs are not available in certain environments (e.g. Serverless GPU clusters). ### Documentation diff --git a/databricks/sdk/mixins/files.py b/databricks/sdk/mixins/files.py index 75e039b87..ca5f56130 100644 --- a/databricks/sdk/mixins/files.py +++ b/databricks/sdk/mixins/files.py @@ -33,7 +33,7 @@ from .._base_client import _BaseClient, _RawResponse, _StreamingResponse from .._property import _cached_property from ..config import Config -from ..errors import AlreadyExists, NotFound, PermissionDenied +from ..errors import AlreadyExists, InternalError, NotFound, PermissionDenied from ..errors.mapper import _error_mapper from ..retries import retried from ..service import files @@ -1650,6 +1650,13 @@ def _do_upload_one_part( raise FallbackToUploadUsingFilesApi(None, "Presigned URLs are disabled") else: raise e from None + except InternalError as e: + if self._is_presigned_urls_network_zone_error(e): + raise FallbackToUploadUsingFilesApi( + None, "Presigned URLs are not supported in the current network zone" + ) + else: + raise e from None upload_part_urls = upload_part_urls_response.get("upload_part_urls", []) if len(upload_part_urls) == 0: @@ -1760,6 +1767,13 @@ def _perform_multipart_upload( raise FallbackToUploadUsingFilesApi(buffer, "Presigned URLs are disabled") else: raise e from None + except InternalError as e: + if chunk_offset == 0 and self._is_presigned_urls_network_zone_error(e): + raise FallbackToUploadUsingFilesApi( + buffer, "Presigned URLs are not supported in the current network zone" + ) + else: + raise e from None upload_part_urls = upload_part_urls_response.get("upload_part_urls", []) if len(upload_part_urls) == 0: @@ -1917,6 +1931,13 @@ def _is_presigned_urls_disabled_error(self, e: PermissionDenied) -> bool: return True return False + def _is_presigned_urls_network_zone_error(self, e: InternalError) -> bool: + error_infos = e.get_error_info() + for error_info in error_infos: + if error_info.reason == "FILES_API_REQUESTER_NETWORK_ZONE_UNKNOWN": + return True + return False + def _perform_resumable_upload( self, ctx: _UploadContext, @@ -1966,6 +1987,13 @@ def _perform_resumable_upload( raise FallbackToUploadUsingFilesApi(pre_read_buffer, "Presigned URLs are disabled") else: raise e from None + except InternalError as e: + if self._is_presigned_urls_network_zone_error(e): + raise FallbackToUploadUsingFilesApi( + pre_read_buffer, "Presigned URLs are not supported in the current network zone" + ) + else: + raise e from None resumable_upload_url_node = resumable_upload_url_response.get("resumable_upload_url") if not resumable_upload_url_node: @@ -2350,6 +2378,11 @@ def _create_download_url(self, file_path: str) -> CreateDownloadUrlResponse: raise FallbackToDownloadUsingFilesApi(f"Presigned URLs are disabled") else: raise e from None + except InternalError as e: + if self._is_presigned_urls_network_zone_error(e): + raise FallbackToDownloadUsingFilesApi("Presigned URLs are not supported in the current network zone") + else: + raise e from None def _init_download_response_presigned_api(self, file_path: str, added_headers: dict[str, str]) -> DownloadResponse: """ diff --git a/tests/test_files.py b/tests/test_files.py index d66b43b5e..2a7450269 100644 --- a/tests/test_files.py +++ b/tests/test_files.py @@ -796,6 +796,25 @@ class PresignedUrlDownloadTestCase: } """ + model_serving_presigned_url_internal_error_response = """ + { + "error_code": "INTERNAL_ERROR", + "message": "Can't infer requester network zone.", + "details": [ + { + "@type": "type.googleapis.com/google.rpc.ErrorInfo", + "reason": "FILES_API_REQUESTER_NETWORK_ZONE_UNKNOWN", + "domain": "filesystem.databricks.com" + }, + { + "@type": "type.googleapis.com/google.rpc.RequestInfo", + "request_id": "b2ffb201-ff61-41ad-93e3-50d47654e924", + "serving_data": "" + } + ] + } + """ + expired_url_aws_response = ( '' "AuthenticationFailedServer failed to authenticate " @@ -1098,6 +1117,16 @@ def run(self, config: Config, monkeypatch) -> None: code=403, only_invocation=1, body=PresignedUrlDownloadTestCase.presigned_url_disabled_response ), ), + PresignedUrlDownloadTestCase( + name="Presigned URL is not issued because NetworkZone is not populated, should fallback to Files API", + file_size=100 * 1024 * 1024, + expected_download_api="files_api", + custom_response_create_presigned_url=CustomResponse( + code=500, + only_invocation=1, + body=PresignedUrlDownloadTestCase.model_serving_presigned_url_internal_error_response, + ), + ), PresignedUrlDownloadTestCase( name="Presigned URL fails with 403 when downloading, should fallback to Files API", file_size=100 * 1024 * 1024, @@ -1496,6 +1525,25 @@ class MultipartUploadTestCase(UploadTestCase): } """ + model_serving_presigned_url_internal_error_response = """ + { + "error_code": "INTERNAL_ERROR", + "message": "Can't infer requester network zone.", + "details": [ + { + "@type": "type.googleapis.com/google.rpc.ErrorInfo", + "reason": "FILES_API_REQUESTER_NETWORK_ZONE_UNKNOWN", + "domain": "filesystem.databricks.com" + }, + { + "@type": "type.googleapis.com/google.rpc.RequestInfo", + "request_id": "b2ffb201-ff61-41ad-93e3-50d47654e924", + "serving_data": "" + } + ] + } + """ + def __init__( self, name: str, @@ -1963,6 +2011,18 @@ def to_string(test_case: "MultipartUploadTestCase") -> str: expected_multipart_upload_aborted=True, expected_single_shot_upload=True, ), + MultipartUploadTestCase( + "Create upload URL: fallback to single-shot upload when presigned URLs are not issue because of the NetworkZone is not populated to Filesystem service", + content_size=1024 * 1024, + custom_response_on_create_multipart_url=CustomResponse( + code=500, + body=MultipartUploadTestCase.model_serving_presigned_url_internal_error_response, + # 1 failure is enough + only_invocation=1, + ), + expected_multipart_upload_aborted=True, + expected_single_shot_upload=True, + ), # -------------------------- failures on part upload -------------------------- MultipartUploadTestCase( "Upload part: 403 response will trigger fallback to single-shot upload on Azure", @@ -2502,6 +2562,17 @@ def to_string(test_case: "ResumableUploadTestCase") -> str: expected_multipart_upload_aborted=False, # upload didn't start expected_single_shot_upload=True, ), + ResumableUploadTestCase( + "Create resumable URL: fallback to single-shot upload when presigned URLs are not issued because of the NetworkZone is not populated to Filesystem service", + stream_size=1024 * 1024, + custom_response_on_create_resumable_url=CustomResponse( + code=500, + body=MultipartUploadTestCase.model_serving_presigned_url_internal_error_response, + only_invocation=1, + ), + expected_multipart_upload_aborted=False, # upload didn't start + expected_single_shot_upload=True, + ), ResumableUploadTestCase( "Create resumable URL: 500 response is not retried", stream_size=1024 * 1024,