|
17 | 17 |
|
18 | 18 | import datetime |
19 | 19 | import glob |
| 20 | +# Version detection and compatibility layer for google-cloud-storage v2/v3 |
| 21 | +from importlib.metadata import version as get_version |
20 | 22 | import logging |
21 | 23 | import os |
22 | 24 | import pathlib |
23 | 25 | import tempfile |
24 | 26 | from typing import Optional, TYPE_CHECKING |
| 27 | +import warnings |
25 | 28 |
|
26 | 29 | from google.auth import credentials as auth_credentials |
27 | 30 | from google.cloud import storage |
| 31 | +from packaging.version import Version |
28 | 32 |
|
29 | 33 | from google.cloud.aiplatform import initializer |
30 | 34 | from google.cloud.aiplatform.utils import resource_manager_utils |
|
35 | 39 | _logger = logging.getLogger(__name__) |
36 | 40 |
|
37 | 41 |
|
| 42 | +# Detect google-cloud-storage version once at module load |
| 43 | +try: |
| 44 | + _GCS_VERSION = Version(get_version("google-cloud-storage")) |
| 45 | +except Exception: |
| 46 | + # Fallback if version detection fails (should not happen in normal use) |
| 47 | + _GCS_VERSION = Version("2.0.0") |
| 48 | + |
| 49 | +_USE_FROM_URI = _GCS_VERSION >= Version("3.0.0") |
| 50 | + |
| 51 | +# Warn users on v2 about upcoming deprecation |
| 52 | +if _GCS_VERSION < Version("3.0.0"): |
| 53 | + warnings.warn( |
| 54 | + "Support for google-cloud-storage < 3.0.0 will be removed in a future" |
| 55 | + " version of google-cloud-aiplatform. Please upgrade to" |
| 56 | + " google-cloud-storage >= 3.0.0.", |
| 57 | + FutureWarning, |
| 58 | + stacklevel=2, |
| 59 | + ) |
| 60 | + |
| 61 | + |
| 62 | +def blob_from_uri(uri: str, client: storage.Client) -> storage.Blob: |
| 63 | + """Create a Blob from a GCS URI, compatible with v2 and v3. |
| 64 | +
|
| 65 | + This function provides compatibility across google-cloud-storage versions: |
| 66 | + - v3.x: Uses Blob.from_uri() |
| 67 | + - v2.x: Uses Blob.from_string() (deprecated in v3) |
| 68 | +
|
| 69 | + Args: |
| 70 | + uri: GCS URI (e.g., 'gs://bucket/path/to/blob') |
| 71 | + client: Storage client instance |
| 72 | +
|
| 73 | + Returns: |
| 74 | + storage.Blob: Blob instance |
| 75 | + """ |
| 76 | + if _USE_FROM_URI: |
| 77 | + return storage.Blob.from_uri(uri, client=client) |
| 78 | + else: |
| 79 | + return storage.Blob.from_string(uri, client=client) |
| 80 | + |
| 81 | + |
| 82 | +def bucket_from_uri(uri: str, client: storage.Client) -> storage.Bucket: |
| 83 | + """Create a Bucket from a GCS URI, compatible with v2 and v3. |
| 84 | +
|
| 85 | + This function provides compatibility across google-cloud-storage versions: |
| 86 | + - v3.x: Uses Bucket.from_uri() |
| 87 | + - v2.x: Uses Bucket.from_string() (deprecated in v3) |
| 88 | +
|
| 89 | + Args: |
| 90 | + uri: GCS bucket URI (e.g., 'gs://bucket-name') |
| 91 | + client: Storage client instance |
| 92 | +
|
| 93 | + Returns: |
| 94 | + storage.Bucket: Bucket instance |
| 95 | + """ |
| 96 | + if _USE_FROM_URI: |
| 97 | + return storage.Bucket.from_uri(uri, client=client) |
| 98 | + else: |
| 99 | + return storage.Bucket.from_string(uri, client=client) |
| 100 | + |
| 101 | + |
38 | 102 | def upload_to_gcs( |
39 | 103 | source_path: str, |
40 | 104 | destination_uri: str, |
@@ -79,16 +143,20 @@ def upload_to_gcs( |
79 | 143 | destination_file_uri = ( |
80 | 144 | destination_uri.rstrip("/") + "/" + source_file_relative_posix_path |
81 | 145 | ) |
82 | | - _logger.debug(f'Uploading "{source_file_path}" to "{destination_file_uri}"') |
83 | | - destination_blob = storage.Blob.from_string( |
| 146 | + _logger.debug( |
| 147 | + 'Uploading "%s" to "%s"', source_file_path, destination_file_uri |
| 148 | + ) |
| 149 | + destination_blob = blob_from_uri( |
84 | 150 | destination_file_uri, client=storage_client |
85 | 151 | ) |
86 | 152 | destination_blob.upload_from_filename(filename=source_file_path) |
87 | 153 | else: |
88 | 154 | source_file_path = source_path |
89 | 155 | destination_file_uri = destination_uri |
90 | | - _logger.debug(f'Uploading "{source_file_path}" to "{destination_file_uri}"') |
91 | | - destination_blob = storage.Blob.from_string( |
| 156 | + _logger.debug( |
| 157 | + 'Uploading "%s" to "%s"', source_file_path, destination_file_uri |
| 158 | + ) |
| 159 | + destination_blob = blob_from_uri( |
92 | 160 | destination_file_uri, client=storage_client |
93 | 161 | ) |
94 | 162 | destination_blob.upload_from_filename(filename=source_file_path) |
@@ -234,7 +302,7 @@ def create_gcs_bucket_for_pipeline_artifacts_if_it_does_not_exist( |
234 | 302 | credentials=credentials, |
235 | 303 | ) |
236 | 304 |
|
237 | | - pipelines_bucket = storage.Bucket.from_string( |
| 305 | + pipelines_bucket = bucket_from_uri( |
238 | 306 | uri=output_artifacts_gcs_dir, |
239 | 307 | client=storage_client, |
240 | 308 | ) |
@@ -294,9 +362,11 @@ def download_file_from_gcs( |
294 | 362 | credentials = credentials or initializer.global_config.credentials |
295 | 363 |
|
296 | 364 | storage_client = storage.Client(project=project, credentials=credentials) |
297 | | - source_blob = storage.Blob.from_string(source_file_uri, client=storage_client) |
| 365 | + source_blob = blob_from_uri(source_file_uri, client=storage_client) |
298 | 366 |
|
299 | | - _logger.debug(f'Downloading "{source_file_uri}" to "{destination_file_path}"') |
| 367 | + _logger.debug( |
| 368 | + 'Downloading "%s" to "%s"', source_file_uri, destination_file_path |
| 369 | + ) |
300 | 370 |
|
301 | 371 | source_blob.download_to_filename(filename=destination_file_path) |
302 | 372 |
|
@@ -351,36 +421,33 @@ def download_from_gcs( |
351 | 421 | def _upload_pandas_df_to_gcs( |
352 | 422 | df: "pandas.DataFrame", upload_gcs_path: str, file_format: str = "jsonl" |
353 | 423 | ) -> None: |
354 | | - """Uploads the provided Pandas DataFrame to a GCS bucket. |
| 424 | + """Uploads the provided Pandas DataFrame to a GCS bucket. |
355 | 425 |
|
356 | | - Args: |
357 | | - df (pandas.DataFrame): |
358 | | - Required. The Pandas DataFrame to upload. |
359 | | - upload_gcs_path (str): |
360 | | - Required. The GCS path to upload the data file. |
361 | | - file_format (str): |
362 | | - Required. The format to export the DataFrame to. Currently |
363 | | - only JSONL is supported. |
| 426 | + Args: |
| 427 | + df (pandas.DataFrame): Required. The Pandas DataFrame to upload. |
| 428 | + upload_gcs_path (str): Required. The GCS path to upload the data file. |
| 429 | + file_format (str): Required. The format to export the DataFrame to. |
| 430 | + Currently only JSONL is supported. |
364 | 431 |
|
365 | | - Raises: |
366 | | - ValueError: When a file format other than JSONL is provided. |
367 | | - """ |
| 432 | + Raises: |
| 433 | + ValueError: When a file format other than JSONL is provided. |
| 434 | + """ |
368 | 435 |
|
369 | | - with tempfile.TemporaryDirectory() as temp_dir: |
370 | | - local_dataset_path = os.path.join(temp_dir, "dataset.jsonl") |
| 436 | + with tempfile.TemporaryDirectory() as temp_dir: |
| 437 | + local_dataset_path = os.path.join(temp_dir, "dataset.jsonl") |
371 | 438 |
|
372 | | - if file_format == "jsonl": |
373 | | - df.to_json(path_or_buf=local_dataset_path, orient="records", lines=True) |
374 | | - else: |
375 | | - raise ValueError(f"Unsupported file format: {file_format}") |
| 439 | + if file_format == "jsonl": |
| 440 | + df.to_json(path_or_buf=local_dataset_path, orient="records", lines=True) |
| 441 | + else: |
| 442 | + raise ValueError(f"Unsupported file format: {file_format}") |
376 | 443 |
|
377 | | - storage_client = storage.Client( |
378 | | - project=initializer.global_config.project, |
379 | | - credentials=initializer.global_config.credentials, |
380 | | - ) |
381 | | - storage.Blob.from_string( |
382 | | - uri=upload_gcs_path, client=storage_client |
383 | | - ).upload_from_filename(filename=local_dataset_path) |
| 444 | + storage_client = storage.Client( |
| 445 | + project=initializer.global_config.project, |
| 446 | + credentials=initializer.global_config.credentials, |
| 447 | + ) |
| 448 | + blob_from_uri( |
| 449 | + uri=upload_gcs_path, client=storage_client |
| 450 | + ).upload_from_filename(filename=local_dataset_path) |
384 | 451 |
|
385 | 452 |
|
386 | 453 | def validate_gcs_path(gcs_path: str) -> None: |
|
0 commit comments