Skip to content

Commit 62c5221

Browse files
speedstorm1copybara-github
authored andcommitted
feat: add support of google-cloud-storage v3 dependency
PiperOrigin-RevId: 825128354
1 parent 9ae5f35 commit 62c5221

25 files changed

+173
-76
lines changed

google/cloud/aiplatform/utils/gcs_utils.py

Lines changed: 99 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,18 @@
1717

1818
import datetime
1919
import glob
20+
# Version detection and compatibility layer for google-cloud-storage v2/v3
21+
from importlib.metadata import version as get_version
2022
import logging
2123
import os
2224
import pathlib
2325
import tempfile
2426
from typing import Optional, TYPE_CHECKING
27+
import warnings
2528

2629
from google.auth import credentials as auth_credentials
2730
from google.cloud import storage
31+
from packaging.version import Version
2832

2933
from google.cloud.aiplatform import initializer
3034
from google.cloud.aiplatform.utils import resource_manager_utils
@@ -35,6 +39,66 @@
3539
_logger = logging.getLogger(__name__)
3640

3741

42+
# Detect google-cloud-storage version once at module load
43+
try:
44+
_GCS_VERSION = Version(get_version("google-cloud-storage"))
45+
except Exception:
46+
# Fallback if version detection fails (should not happen in normal use)
47+
_GCS_VERSION = Version("2.0.0")
48+
49+
_USE_FROM_URI = _GCS_VERSION >= Version("3.0.0")
50+
51+
# Warn users on v2 about upcoming deprecation
52+
if _GCS_VERSION < Version("3.0.0"):
53+
warnings.warn(
54+
"Support for google-cloud-storage < 3.0.0 will be removed in a future"
55+
" version of google-cloud-aiplatform. Please upgrade to"
56+
" google-cloud-storage >= 3.0.0.",
57+
FutureWarning,
58+
stacklevel=2,
59+
)
60+
61+
62+
def blob_from_uri(uri: str, client: storage.Client) -> storage.Blob:
63+
"""Create a Blob from a GCS URI, compatible with v2 and v3.
64+
65+
This function provides compatibility across google-cloud-storage versions:
66+
- v3.x: Uses Blob.from_uri()
67+
- v2.x: Uses Blob.from_string() (deprecated in v3)
68+
69+
Args:
70+
uri: GCS URI (e.g., 'gs://bucket/path/to/blob')
71+
client: Storage client instance
72+
73+
Returns:
74+
storage.Blob: Blob instance
75+
"""
76+
if _USE_FROM_URI:
77+
return storage.Blob.from_uri(uri, client=client)
78+
else:
79+
return storage.Blob.from_string(uri, client=client)
80+
81+
82+
def bucket_from_uri(uri: str, client: storage.Client) -> storage.Bucket:
83+
"""Create a Bucket from a GCS URI, compatible with v2 and v3.
84+
85+
This function provides compatibility across google-cloud-storage versions:
86+
- v3.x: Uses Bucket.from_uri()
87+
- v2.x: Uses Bucket.from_string() (deprecated in v3)
88+
89+
Args:
90+
uri: GCS bucket URI (e.g., 'gs://bucket-name')
91+
client: Storage client instance
92+
93+
Returns:
94+
storage.Bucket: Bucket instance
95+
"""
96+
if _USE_FROM_URI:
97+
return storage.Bucket.from_uri(uri, client=client)
98+
else:
99+
return storage.Bucket.from_string(uri, client=client)
100+
101+
38102
def upload_to_gcs(
39103
source_path: str,
40104
destination_uri: str,
@@ -79,16 +143,20 @@ def upload_to_gcs(
79143
destination_file_uri = (
80144
destination_uri.rstrip("/") + "/" + source_file_relative_posix_path
81145
)
82-
_logger.debug(f'Uploading "{source_file_path}" to "{destination_file_uri}"')
83-
destination_blob = storage.Blob.from_string(
146+
_logger.debug(
147+
'Uploading "%s" to "%s"', source_file_path, destination_file_uri
148+
)
149+
destination_blob = blob_from_uri(
84150
destination_file_uri, client=storage_client
85151
)
86152
destination_blob.upload_from_filename(filename=source_file_path)
87153
else:
88154
source_file_path = source_path
89155
destination_file_uri = destination_uri
90-
_logger.debug(f'Uploading "{source_file_path}" to "{destination_file_uri}"')
91-
destination_blob = storage.Blob.from_string(
156+
_logger.debug(
157+
'Uploading "%s" to "%s"', source_file_path, destination_file_uri
158+
)
159+
destination_blob = blob_from_uri(
92160
destination_file_uri, client=storage_client
93161
)
94162
destination_blob.upload_from_filename(filename=source_file_path)
@@ -234,7 +302,7 @@ def create_gcs_bucket_for_pipeline_artifacts_if_it_does_not_exist(
234302
credentials=credentials,
235303
)
236304

237-
pipelines_bucket = storage.Bucket.from_string(
305+
pipelines_bucket = bucket_from_uri(
238306
uri=output_artifacts_gcs_dir,
239307
client=storage_client,
240308
)
@@ -294,9 +362,11 @@ def download_file_from_gcs(
294362
credentials = credentials or initializer.global_config.credentials
295363

296364
storage_client = storage.Client(project=project, credentials=credentials)
297-
source_blob = storage.Blob.from_string(source_file_uri, client=storage_client)
365+
source_blob = blob_from_uri(source_file_uri, client=storage_client)
298366

299-
_logger.debug(f'Downloading "{source_file_uri}" to "{destination_file_path}"')
367+
_logger.debug(
368+
'Downloading "%s" to "%s"', source_file_uri, destination_file_path
369+
)
300370

301371
source_blob.download_to_filename(filename=destination_file_path)
302372

@@ -351,36 +421,33 @@ def download_from_gcs(
351421
def _upload_pandas_df_to_gcs(
352422
df: "pandas.DataFrame", upload_gcs_path: str, file_format: str = "jsonl"
353423
) -> None:
354-
"""Uploads the provided Pandas DataFrame to a GCS bucket.
424+
"""Uploads the provided Pandas DataFrame to a GCS bucket.
355425
356-
Args:
357-
df (pandas.DataFrame):
358-
Required. The Pandas DataFrame to upload.
359-
upload_gcs_path (str):
360-
Required. The GCS path to upload the data file.
361-
file_format (str):
362-
Required. The format to export the DataFrame to. Currently
363-
only JSONL is supported.
426+
Args:
427+
df (pandas.DataFrame): Required. The Pandas DataFrame to upload.
428+
upload_gcs_path (str): Required. The GCS path to upload the data file.
429+
file_format (str): Required. The format to export the DataFrame to.
430+
Currently only JSONL is supported.
364431
365-
Raises:
366-
ValueError: When a file format other than JSONL is provided.
367-
"""
432+
Raises:
433+
ValueError: When a file format other than JSONL is provided.
434+
"""
368435

369-
with tempfile.TemporaryDirectory() as temp_dir:
370-
local_dataset_path = os.path.join(temp_dir, "dataset.jsonl")
436+
with tempfile.TemporaryDirectory() as temp_dir:
437+
local_dataset_path = os.path.join(temp_dir, "dataset.jsonl")
371438

372-
if file_format == "jsonl":
373-
df.to_json(path_or_buf=local_dataset_path, orient="records", lines=True)
374-
else:
375-
raise ValueError(f"Unsupported file format: {file_format}")
439+
if file_format == "jsonl":
440+
df.to_json(path_or_buf=local_dataset_path, orient="records", lines=True)
441+
else:
442+
raise ValueError(f"Unsupported file format: {file_format}")
376443

377-
storage_client = storage.Client(
378-
project=initializer.global_config.project,
379-
credentials=initializer.global_config.credentials,
380-
)
381-
storage.Blob.from_string(
382-
uri=upload_gcs_path, client=storage_client
383-
).upload_from_filename(filename=local_dataset_path)
444+
storage_client = storage.Client(
445+
project=initializer.global_config.project,
446+
credentials=initializer.global_config.credentials,
447+
)
448+
blob_from_uri(
449+
uri=upload_gcs_path, client=storage_client
450+
).upload_from_filename(filename=local_dataset_path)
384451

385452

386453
def validate_gcs_path(gcs_path: str) -> None:

google/cloud/aiplatform/utils/yaml_utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from google.auth import transport
2323
from google.cloud import storage
2424
from google.cloud.aiplatform.constants import pipeline as pipeline_constants
25+
from google.cloud.aiplatform.utils.gcs_utils import blob_from_uri
2526

2627
# Pattern for an Artifact Registry URL.
2728
_VALID_AR_URL = pipeline_constants._VALID_AR_URL
@@ -98,7 +99,7 @@ def _load_yaml_from_gs_uri(
9899
"""
99100
yaml = _maybe_import_yaml()
100101
storage_client = storage.Client(project=project, credentials=credentials)
101-
blob = storage.Blob.from_string(uri, storage_client)
102+
blob = blob_from_uri(uri, storage_client)
102103
return yaml.safe_load(blob.download_as_bytes())
103104

104105

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -306,8 +306,8 @@
306306
"proto-plus >= 1.22.3, <2.0.0",
307307
"protobuf>=3.20.2,<7.0.0,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5",
308308
"packaging >= 14.3",
309-
"google-cloud-storage >= 1.32.0, < 3.0.0; python_version<'3.13'",
310-
"google-cloud-storage >= 2.10.0, < 3.0.0; python_version>='3.13'",
309+
"google-cloud-storage >= 1.32.0, < 4.0.0; python_version<'3.13'",
310+
"google-cloud-storage >= 2.10.0, < 4.0.0; python_version>='3.13'",
311311
"google-cloud-bigquery >= 1.15.0, < 4.0.0, !=3.20.0",
312312
"google-cloud-resource-manager >= 1.3.3, < 3.0.0",
313313
"shapely < 3.0.0",

testing/constraints-3.10.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ google-auth==2.35.0 # Tests google-auth with rest async support
66
proto-plus==1.22.3
77
protobuf
88
mock==4.0.2
9-
google-cloud-storage==2.2.1 # Increased for kfp 2.0 compatibility
9+
google-cloud-storage==3.0.0 # Updated to v3.x, backward compatible with v2.x via wrapper
1010
packaging==24.1 # Increased to unbreak canonicalize_version error (b/377774673)
1111
grpcio-testing==1.34.0
1212
mlflow==2.16.0 # Pinned to speed up installation

testing/constraints-3.11.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ google-auth==2.35.0 # Tests google-auth with rest async support
66
proto-plus
77
protobuf
88
mock==4.0.2
9-
google-cloud-storage==2.2.1 # Increased for kfp 2.0 compatibility
9+
google-cloud-storage==3.0.0 # Updated to v3.x, backward compatible with v2.x via wrapper
1010
packaging==24.1 # Increased to unbreak canonicalize_version error (b/377774673)
1111
pytest-xdist==3.3.1 # Pinned to unbreak unit tests
1212
ray==2.5.0 # Pinned until 2.9.3 is verified for Ray tests

testing/constraints-3.12.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ google-api-core==2.21.0 # Tests google-api-core with rest async support
55
google-auth==2.35.0 # Tests google-auth with rest async support
66
proto-plus
77
mock==4.0.2
8-
google-cloud-storage==2.2.1 # Increased for kfp 2.0 compatibility
8+
google-cloud-storage==3.0.0 # Updated to v3.x, backward compatible with v2.x via wrapper
99
packaging==24.1 # Increased to unbreak canonicalize_version error (b/377774673)
1010
pytest-xdist==3.3.1 # Pinned to unbreak unit tests
1111
ray==2.5.0 # Pinned until 2.9.3 is verified for Ray tests

testing/constraints-3.13.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ google-api-core==2.21.0 # Tests google-api-core with rest async support
55
google-auth==2.35.0 # Tests google-auth with rest async support
66
proto-plus
77
mock==4.0.2
8-
google-cloud-storage==2.10.0 # Increased for kfp 2.0 compatibility
8+
google-cloud-storage==3.0.0 # Updated to v3.x, backward compatible with v2.x via wrapper
99
packaging==24.1 # Increased to unbreak canonicalize_version error (b/377774673)
1010
pytest-xdist==3.3.1 # Pinned to unbreak unit tests
1111
ray==2.5.0 # Pinned until 2.9.3 is verified for Ray tests

testing/constraints-3.8.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ google-auth==2.14.1 # Tests google-auth without rest async support
77
proto-plus==1.22.3
88
protobuf
99
mock==4.0.2
10-
google-cloud-storage==2.2.1 # Increased for kfp 2.0 compatibility
10+
google-cloud-storage==3.0.0 # Updated to v3.x, backward compatible with v2.x via wrapper
1111
packaging==24.1 # Increased to unbreak canonicalize_version error (b/377774673)
1212
grpcio-testing==1.34.0
1313
pytest-xdist==3.3.1 # Pinned to unbreak unit tests

testing/constraints-3.9.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ google-auth==2.35.0 # Tests google-auth with rest async support
66
proto-plus==1.22.3
77
protobuf
88
mock==4.0.2
9-
google-cloud-storage==2.2.1 # Increased for kfp 2.0 compatibility
9+
google-cloud-storage==3.0.0 # Updated to v3.x, backward compatible with v2.x via wrapper
1010
packaging==24.1 # Increased to unbreak canonicalize_version error (b/377774673)
1111
grpcio-testing==1.34.0
1212
pytest-xdist==3.3.1 # Pinned to unbreak unit tests

testing/constraints-ray-2.33.0.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ google-api-core
44
proto-plus==1.22.3
55
protobuf
66
mock==4.0.2
7-
google-cloud-storage==2.2.1 # Increased for kfp 2.0 compatibility
7+
google-cloud-storage==3.0.0 # Updated to v3.x, backward compatible with v2.x via wrapper
88
packaging==24.1 # Increased to unbreak canonicalize_version error (b/377774673)
99
grpcio-testing==1.34.0
1010
mlflow==1.30.1 # Pinned to speed up installation

0 commit comments

Comments
 (0)