Skip to content

Commit 8c744de

Browse files
SNOW-2032699: Use GCS virtual url based on the stage response (#2274)
1 parent c384ce1 commit 8c744de

File tree

7 files changed

+39
-58
lines changed

7 files changed

+39
-58
lines changed

DESCRIPTION.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,14 @@ Source code is also available at: https://github.com/snowflakedb/snowflake-conne
1616
- Added experimental support for OAuth authorization code and client credentials flows.
1717
- Improved error message for client-side query cancellations due to timeouts.
1818
- Added support of GCS regional endpoints.
19-
- Added `gcs_use_virtual_endpoints` connection property that forces the usage of the virtual GCS usage. Thanks to this it should be possible to set up private DNS entry for the GCS endpoint. See more: https://cloud.google.com/storage/docs/request-endpoints#xml-api
2019
- Fixed a bug that caused driver to fail silently on `TO_DATE` arrow to python conversion when invalid date was followed by the correct one.
2120
- Added `check_arrow_conversion_error_on_every_column` connection property that can be set to `False` to restore previous behaviour in which driver will ignore errors until it occurs in the last column. This flag's purpose is to unblock workflows that may be impacted by the bugfix and will be removed in later releases.
2221
- Lower log levels from info to debug for some of the messages to make the output easier to follow.
2322
- Allow the connector to inherit a UUID4 generated upstream, provided in statement parameters (field: `requestId`), rather than automatically generate a UUID4 to use for the HTTP Request ID.
2423
- Improved logging in urllib3, boto3, botocore - assured data masking even after migration to the external owned library in the future.
2524
- Fix expired S3 credentials update and increment retry when expired credentials are found.
2625
- Added `client_fetch_threads` experimental parameter to better utilize threads for fetching query results.
26+
- Added support of GCS virtual urls. See more: https://cloud.google.com/storage/docs/request-endpoints#xml-api
2727

2828
- v3.14.0(March 03, 2025)
2929
- Bumped pyOpenSSL dependency upper boundary from <25.0.0 to <26.0.0.

src/snowflake/connector/connection.py

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -320,10 +320,6 @@ def _get_private_bytes_from_file(
320320
None,
321321
(type(None), int),
322322
), # SNOW-1817982: limit iobound TPE sizes when executing PUT/GET
323-
"gcs_use_virtual_endpoints": (
324-
False,
325-
bool,
326-
), # use https://{bucket}.storage.googleapis.com instead of https://storage.googleapis.com/{bucket}
327323
"oauth_client_id": (
328324
None,
329325
(type(None), str),
@@ -439,7 +435,6 @@ class SnowflakeConnection:
439435
before the connector shuts down. Default value is false.
440436
token_file_path: The file path of the token file. If both token and token_file_path are provided, the token in token_file_path will be used.
441437
unsafe_file_write: When true, files downloaded by GET will be saved with 644 permissions. Otherwise, files will be saved with safe - owner-only permissions: 600.
442-
gcs_use_virtual_endpoints: When true, the virtual endpoint url is used, see: https://cloud.google.com/storage/docs/request-endpoints#xml-api
443438
check_arrow_conversion_error_on_every_column: When true, the error check after the conversion from arrow to python types will happen for every column in the row. This is a new behaviour which fixes the bug that caused the type errors to trigger silently when occurring at any place other than last column in a row. To revert the previous (faulty) behaviour, please set this flag to false.
444439
"""
445440

@@ -858,14 +853,6 @@ def oauth_security_features(self) -> _OAuthSecurityFeatures:
858853
refresh_token_enabled="refresh_token" in features,
859854
)
860855

861-
@property
862-
def gcs_use_virtual_endpoints(self) -> bool:
863-
return self._gcs_use_virtual_endpoints
864-
865-
@gcs_use_virtual_endpoints.setter
866-
def gcs_use_virtual_endpoints(self, value: bool) -> None:
867-
self._gcs_use_virtual_endpoints = value
868-
869856
@property
870857
def check_arrow_conversion_error_on_every_column(self) -> bool:
871858
return self._check_arrow_conversion_error_on_every_column

src/snowflake/connector/cursor.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1083,7 +1083,6 @@ def execute(
10831083
use_s3_regional_url=self._connection.enable_stage_s3_privatelink_for_us_east_1,
10841084
iobound_tpe_limit=self._connection.iobound_tpe_limit,
10851085
unsafe_file_write=self._connection.unsafe_file_write,
1086-
gcs_use_virtual_endpoints=self._connection.gcs_use_virtual_endpoints,
10871086
)
10881087
sf_file_transfer_agent.execute()
10891088
data = sf_file_transfer_agent.result()

src/snowflake/connector/file_transfer_agent.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,6 @@ def __init__(
355355
use_s3_regional_url: bool = False,
356356
iobound_tpe_limit: int | None = None,
357357
unsafe_file_write: bool = False,
358-
gcs_use_virtual_endpoints: bool = False,
359358
) -> None:
360359
self._cursor = cursor
361360
self._command = command
@@ -388,7 +387,6 @@ def __init__(
388387
self._credentials: StorageCredential | None = None
389388
self._iobound_tpe_limit = iobound_tpe_limit
390389
self._unsafe_file_write = unsafe_file_write
391-
self._gcs_use_virtual_endpoints = gcs_use_virtual_endpoints
392390

393391
def execute(self) -> None:
394392
self._parse_command()
@@ -704,7 +702,6 @@ def _create_file_transfer_client(
704702
self._cursor._connection,
705703
self._command,
706704
unsafe_file_write=self._unsafe_file_write,
707-
use_virtual_endpoints=self._gcs_use_virtual_endpoints,
708705
)
709706
raise Exception(f"{self._stage_location_type} is an unknown stage type")
710707

src/snowflake/connector/gcs_storage_client.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ def __init__(
5252
cnx: SnowflakeConnection,
5353
command: str,
5454
unsafe_file_write: bool = False,
55-
use_virtual_endpoints: bool = False,
5655
) -> None:
5756
"""Creates a client object with given stage credentials.
5857
@@ -86,7 +85,9 @@ def __init__(
8685
self.endpoint: str | None = (
8786
None if "endPoint" not in stage_info else stage_info["endPoint"]
8887
)
89-
self.use_virtual_endpoints: bool = use_virtual_endpoints
88+
self.use_virtual_url: bool = (
89+
"useVirtualUrl" in stage_info and stage_info["useVirtualUrl"]
90+
)
9091

9192
if self.security_token:
9293
logger.debug(f"len(GCS_ACCESS_TOKEN): {len(self.security_token)}")
@@ -169,7 +170,7 @@ def generate_url_and_rest_args() -> (
169170
else self.stage_info["region"]
170171
),
171172
self.endpoint,
172-
self.use_virtual_endpoints,
173+
self.use_virtual_url,
173174
)
174175
access_token = self.security_token
175176
else:
@@ -208,7 +209,7 @@ def generate_url_and_rest_args() -> (
208209
else self.stage_info["region"]
209210
),
210211
self.endpoint,
211-
self.use_virtual_endpoints,
212+
self.use_virtual_url,
212213
)
213214
access_token = self.security_token
214215
gcs_headers["Authorization"] = f"Bearer {access_token}"
@@ -374,7 +375,7 @@ def generate_url_and_authenticated_headers():
374375
else self.stage_info["region"]
375376
),
376377
self.endpoint,
377-
self.use_virtual_endpoints,
378+
self.use_virtual_url,
378379
)
379380
gcs_headers = {"Authorization": f"Bearer {self.security_token}"}
380381
rest_args = {"headers": gcs_headers}
@@ -423,7 +424,7 @@ def get_location(
423424
use_regional_url: str = False,
424425
region: str = None,
425426
endpoint: str = None,
426-
use_virtual_endpoints: bool = False,
427+
use_virtual_url: bool = False,
427428
) -> GcsLocation:
428429
container_name = stage_location
429430
path = ""
@@ -438,7 +439,7 @@ def get_location(
438439
if endpoint.endswith("/"):
439440
endpoint = endpoint[:-1]
440441
return GcsLocation(bucket_name=container_name, path=path, endpoint=endpoint)
441-
elif use_virtual_endpoints:
442+
elif use_virtual_url:
442443
return GcsLocation(
443444
bucket_name=container_name,
444445
path=path,
@@ -460,14 +461,14 @@ def generate_file_url(
460461
use_regional_url: str = False,
461462
region: str = None,
462463
endpoint: str = None,
463-
use_virtual_endpoints: bool = False,
464+
use_virtual_url: bool = False,
464465
) -> str:
465466
gcs_location = SnowflakeGCSRestClient.get_location(
466467
stage_location, use_regional_url, region, endpoint
467468
)
468469
full_file_path = f"{gcs_location.path}{filename}"
469470

470-
if use_virtual_endpoints:
471+
if use_virtual_url:
471472
return f"{gcs_location.endpoint}/{quote(full_file_path)}"
472473
else:
473474
return f"{gcs_location.endpoint}/{gcs_location.bucket_name}/{quote(full_file_path)}"

test/integ/test_connection.py

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1375,34 +1375,6 @@ def test_server_session_keep_alive(conn_cnx):
13751375
mock_delete_session.assert_called_once()
13761376

13771377

1378-
@pytest.mark.skipolddriver
1379-
@pytest.mark.parametrize(
1380-
"value",
1381-
[
1382-
True,
1383-
False,
1384-
],
1385-
)
1386-
def test_gcs_use_virtual_endpoints(conn_cnx, value):
1387-
with mock.patch(
1388-
"snowflake.connector.network.SnowflakeRestful.fetch",
1389-
return_value={"data": {"token": None, "masterToken": None}, "success": True},
1390-
):
1391-
with snowflake.connector.connect(
1392-
user="test-user",
1393-
password="test-password",
1394-
host="test-host",
1395-
port="443",
1396-
account="test-account",
1397-
gcs_use_virtual_endpoints=value,
1398-
) as cnx:
1399-
assert cnx
1400-
cnx.commit = cnx.rollback = (
1401-
lambda: None
1402-
) # Skip tear down, there's only a mocked rest api
1403-
assert cnx.gcs_use_virtual_endpoints == value
1404-
1405-
14061378
@pytest.mark.skipolddriver
14071379
def test_ocsp_mode_disable_ocsp_checks(conn_cnx, is_public_test, caplog):
14081380
caplog.set_level(logging.DEBUG, "snowflake.connector.ocsp_snowflake")

test/unit/test_gcs_client.py

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,7 @@ def test_get_file_header_none_with_presigned_url(tmp_path):
350350

351351

352352
@pytest.mark.parametrize(
353-
"region,return_url,use_regional_url,endpoint,gcs_use_virtual_endpoints",
353+
"region,return_url,use_regional_url,endpoint,use_virtual_url",
354354
[
355355
(
356356
"US-CENTRAL1",
@@ -407,13 +407,13 @@ def test_get_file_header_none_with_presigned_url(tmp_path):
407407
),
408408
],
409409
)
410-
def test_url(region, return_url, use_regional_url, endpoint, gcs_use_virtual_endpoints):
410+
def test_url(region, return_url, use_regional_url, endpoint, use_virtual_url):
411411
gcs_location = SnowflakeGCSRestClient.get_location(
412412
stage_location="location",
413413
use_regional_url=use_regional_url,
414414
region=region,
415415
endpoint=endpoint,
416-
use_virtual_endpoints=gcs_use_virtual_endpoints,
416+
use_virtual_url=use_virtual_url,
417417
)
418418
assert gcs_location.endpoint == return_url
419419

@@ -446,3 +446,28 @@ def test_use_regional_url(region, use_regional_url, return_value):
446446
)
447447

448448
assert client.use_regional_url == return_value
449+
450+
451+
@pytest.mark.parametrize(
452+
"use_virtual_url,return_value",
453+
[(False, False), (True, True), (None, False)],
454+
)
455+
def test_stage_info_use_virtual_url(use_virtual_url, return_value):
456+
meta = SnowflakeFileMeta(
457+
name="path/some_file",
458+
src_file_name="path/some_file",
459+
stage_location_type="GCS",
460+
presigned_url="www.example.com",
461+
)
462+
storage_credentials = Mock()
463+
storage_credentials.creds = {}
464+
stage_info: dict[str, any] = dict()
465+
if use_virtual_url is not None:
466+
stage_info["useVirtualUrl"] = use_virtual_url
467+
connection = Mock()
468+
469+
client = SnowflakeGCSRestClient(
470+
meta, storage_credentials, stage_info, connection, ""
471+
)
472+
473+
assert client.use_virtual_url == return_value

0 commit comments

Comments
 (0)