Skip to content

Commit 0c7ae3f

Browse files
committed
Merge remote-tracking branch 'origin/main' into zb-write-copy
2 parents 9fdcbe0 + 5112276 commit 0c7ae3f

File tree

8 files changed

+302
-135
lines changed

8 files changed

+302
-135
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,7 @@ jobs:
4242
pytest -vv -s \
4343
--log-format="%(asctime)s %(levelname)s %(message)s" \
4444
--log-date-format="%H:%M:%S" \
45-
gcsfs/ \
46-
--ignore=gcsfs/tests/test_extended_gcsfs.py \
47-
--ignore=gcsfs/tests/test_zonal_file.py
48-
45+
gcsfs/
4946
- name: Run Extended Tests
5047
run: |
5148
pip install "git+https://github.com/googleapis/python-storage@main#egg=google-cloud-storage" --force-reinstall --no-cache-dir --upgrade

docs/source/developer.rst

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,10 @@ real GCS. A small number of tests run differently or are skipped.
1818

1919
If you want to actually test against real GCS, then you should set
2020
STORAGE_EMULATOR_HOST to "https://storage.googleapis.com" and also
21-
provide appropriate GCSFS_TEST_BUCKET and GCSFS_TEST_PROJECT, as well
22-
as setting your default google credentials (or providing them via the
23-
fsspec config).
21+
provide appropriate GCSFS_TEST_BUCKET, GCSFS_TEST_VERSIONED_BUCKET
22+
(To use for tests that target GCS object versioning, this bucket must have versioning enabled),
23+
GCSFS_ZONAL_TEST_BUCKET(To use for testing Rapid storage features) and GCSFS_TEST_PROJECT,
24+
as well as setting your default google credentials (or providing them via the fsspec config).
2425

2526
.. _fake-gcs-server: https://github.com/fsouza/fake-gcs-server
2627

gcsfs/tests/conftest.py

Lines changed: 138 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,10 @@
77
import fsspec
88
import pytest
99
import requests
10+
from google.cloud import storage
1011

1112
from gcsfs import GCSFileSystem
12-
from gcsfs.tests.settings import TEST_BUCKET
13+
from gcsfs.tests.settings import TEST_BUCKET, TEST_VERSIONED_BUCKET, TEST_ZONAL_BUCKET
1314

1415
files = {
1516
"test/accounts.1.json": (
@@ -57,7 +58,7 @@ def stop_docker(container):
5758
subprocess.call(["docker", "rm", "-f", "-v", cid])
5859

5960

60-
@pytest.fixture(scope="module")
61+
@pytest.fixture(scope="session")
6162
def docker_gcs():
6263
if "STORAGE_EMULATOR_HOST" in os.environ:
6364
# assume using real API or otherwise have a server already set up
@@ -88,7 +89,7 @@ def docker_gcs():
8889
stop_docker(container)
8990

9091

91-
@pytest.fixture
92+
@pytest.fixture(scope="session")
9293
def gcs_factory(docker_gcs):
9394
params["endpoint_url"] = docker_gcs
9495

@@ -99,44 +100,83 @@ def factory(**kwargs):
99100
return factory
100101

101102

103+
@pytest.fixture(scope="session")
104+
def buckets_to_delete():
105+
"""A set to keep track of buckets created during the test session."""
106+
return set()
107+
108+
102109
@pytest.fixture
103-
def gcs(gcs_factory, populate=True):
110+
def gcs(gcs_factory, buckets_to_delete, populate=True):
104111
gcs = gcs_factory()
105-
try:
106-
# ensure we're empty.
107-
try:
108-
gcs.rm(TEST_BUCKET, recursive=True)
109-
except FileNotFoundError:
110-
pass
111-
try:
112+
try: # ensure we're empty.
113+
# Create the bucket if it doesn't exist, otherwise clean it.
114+
if not gcs.exists(TEST_BUCKET):
112115
gcs.mkdir(TEST_BUCKET)
113-
except Exception:
114-
pass
116+
buckets_to_delete.add(TEST_BUCKET)
117+
else:
118+
try:
119+
gcs.rm(gcs.find(TEST_BUCKET))
120+
except Exception as e:
121+
logging.warning(f"Failed to empty bucket {TEST_BUCKET}: {e}")
115122

116123
if populate:
117124
gcs.pipe({TEST_BUCKET + "/" + k: v for k, v in allfiles.items()})
118125
gcs.invalidate_cache()
119126
yield gcs
120127
finally:
121-
try:
122-
gcs.rm(gcs.find(TEST_BUCKET))
123-
gcs.rm(TEST_BUCKET)
124-
except: # noqa: E722
125-
pass
128+
_cleanup_gcs(gcs)
126129

127130

128-
def _cleanup_gcs(gcs, is_real_gcs):
129-
"""Only remove the bucket/contents if we are NOT using the real GCS, logging a warning on failure."""
130-
if is_real_gcs:
131-
return
131+
def _cleanup_gcs(gcs):
132+
"""Clean the bucket contents, logging a warning on failure."""
132133
try:
133-
gcs.rm(TEST_BUCKET, recursive=True)
134+
gcs.rm(gcs.find(TEST_BUCKET))
134135
except Exception as e:
135136
logging.warning(f"Failed to clean up GCS bucket {TEST_BUCKET}: {e}")
136137

137138

139+
@pytest.fixture(scope="session", autouse=True)
140+
def final_cleanup(gcs_factory, buckets_to_delete):
141+
"""A session-scoped fixture to delete the test buckets after all tests are run."""
142+
yield
143+
# This code runs after the entire test session finishes
144+
use_extended_gcs = os.getenv(
145+
"GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT", "false"
146+
).lower() in (
147+
"true",
148+
"1",
149+
)
150+
151+
if use_extended_gcs:
152+
is_real_gcs = (
153+
os.environ.get("STORAGE_EMULATOR_HOST") == "https://storage.googleapis.com"
154+
)
155+
mock_authentication_manager = (
156+
patch("google.auth.default", return_value=(None, "fake-project"))
157+
if not is_real_gcs
158+
else nullcontext()
159+
)
160+
else:
161+
mock_authentication_manager = nullcontext()
162+
163+
with mock_authentication_manager:
164+
gcs = gcs_factory()
165+
for bucket in buckets_to_delete:
166+
# For real GCS, only delete if created by the test suite.
167+
# For emulators, always delete.
168+
try:
169+
if gcs.exists(bucket):
170+
gcs.rm(bucket, recursive=True)
171+
logging.info(f"Cleaned up bucket: {bucket}")
172+
except Exception as e:
173+
logging.warning(
174+
f"Failed to perform final cleanup for bucket {bucket}: {e}"
175+
)
176+
177+
138178
@pytest.fixture
139-
def extended_gcsfs(gcs_factory, populate=True):
179+
def extended_gcsfs(gcs_factory, buckets_to_delete, populate=True):
140180
# Check if we are running against a real GCS endpoint
141181
is_real_gcs = (
142182
os.environ.get("STORAGE_EMULATOR_HOST") == "https://storage.googleapis.com"
@@ -147,39 +187,96 @@ def extended_gcsfs(gcs_factory, populate=True):
147187
# Only create/delete/populate the bucket if we are NOT using the real GCS endpoint
148188
if not is_real_gcs:
149189
try:
150-
extended_gcsfs.rm(TEST_BUCKET, recursive=True)
190+
extended_gcsfs.rm(TEST_ZONAL_BUCKET, recursive=True)
151191
except FileNotFoundError:
152192
pass
153-
extended_gcsfs.mkdir(TEST_BUCKET)
193+
extended_gcsfs.mkdir(TEST_ZONAL_BUCKET)
194+
buckets_to_delete.add(TEST_ZONAL_BUCKET)
154195
if populate:
155196
extended_gcsfs.pipe(
156-
{TEST_BUCKET + "/" + k: v for k, v in allfiles.items()}
197+
{TEST_ZONAL_BUCKET + "/" + k: v for k, v in allfiles.items()}
157198
)
158199
extended_gcsfs.invalidate_cache()
159200
yield extended_gcsfs
160201
finally:
161-
_cleanup_gcs(extended_gcsfs, is_real_gcs)
202+
_cleanup_gcs(extended_gcsfs)
162203

163204

164205
@pytest.fixture
165-
def gcs_versioned(gcs_factory):
206+
def gcs_versioned(gcs_factory, buckets_to_delete):
166207
gcs = gcs_factory()
167208
gcs.version_aware = True
168-
try:
209+
is_real_gcs = (
210+
os.environ.get("STORAGE_EMULATOR_HOST") == "https://storage.googleapis.com"
211+
)
212+
try: # ensure we're empty.
213+
# The versioned bucket might be created by `is_versioning_enabled`
214+
# in test_core_versioned.py. We must register it for cleanup only if
215+
# it was created by this test run.
169216
try:
170-
gcs.rm(gcs.find(TEST_BUCKET, versions=True))
171-
except FileNotFoundError:
172-
pass
217+
from gcsfs.tests.test_core_versioned import (
218+
_VERSIONED_BUCKET_CREATED_BY_TESTS,
219+
)
173220

174-
try:
175-
gcs.mkdir(TEST_BUCKET, enable_versioning=True)
176-
except Exception:
177-
pass
221+
if _VERSIONED_BUCKET_CREATED_BY_TESTS:
222+
buckets_to_delete.add(TEST_VERSIONED_BUCKET)
223+
except ImportError:
224+
pass # test_core_versioned is not being run
225+
if is_real_gcs:
226+
cleanup_versioned_bucket(gcs, TEST_VERSIONED_BUCKET)
227+
else:
228+
# For emulators, we delete and recreate the bucket for a clean state
229+
try:
230+
gcs.rm(TEST_VERSIONED_BUCKET, recursive=True)
231+
except FileNotFoundError:
232+
pass
233+
gcs.mkdir(TEST_VERSIONED_BUCKET, enable_versioning=True)
234+
buckets_to_delete.add(TEST_VERSIONED_BUCKET)
178235
gcs.invalidate_cache()
179236
yield gcs
180237
finally:
238+
# Ensure the bucket is empty after the test.
181239
try:
182-
gcs.rm(gcs.find(TEST_BUCKET, versions=True))
183-
gcs.rm(TEST_BUCKET)
184-
except: # noqa: E722
185-
pass
240+
if is_real_gcs:
241+
cleanup_versioned_bucket(gcs, TEST_VERSIONED_BUCKET)
242+
except Exception as e:
243+
logging.warning(
244+
f"Failed to clean up versioned bucket {TEST_VERSIONED_BUCKET} after test: {e}"
245+
)
246+
247+
248+
def cleanup_versioned_bucket(gcs, bucket_name, prefix=None):
249+
"""
250+
Deletes all object versions in a bucket using the google-cloud-storage client,
251+
ensuring it uses the same credentials as the gcsfs instance.
252+
"""
253+
# Define a retry policy for API calls to handle rate limiting.
254+
# This can retry on 429 Too Many Requests errors, which can happen
255+
# when deleting many object versions quickly.
256+
from google.api_core.retry import Retry
257+
258+
retry_policy = Retry(
259+
initial=1.0, # Initial delay in seconds
260+
maximum=30.0, # Maximum delay in seconds
261+
multiplier=1.2, # Backoff factor
262+
)
263+
264+
client = storage.Client(
265+
credentials=gcs.credentials.credentials, project=gcs.project
266+
)
267+
268+
# List all blobs, including old versions
269+
blobs_to_delete = list(client.list_blobs(bucket_name, versions=True, prefix=prefix))
270+
271+
if not blobs_to_delete:
272+
logging.info("No object versions to delete in %s.", bucket_name)
273+
return
274+
275+
logging.info(
276+
"Deleting %d object versions from %s.", len(blobs_to_delete), bucket_name
277+
)
278+
time.sleep(2)
279+
for blob in blobs_to_delete:
280+
blob.delete(retry=retry_policy)
281+
282+
logging.info("Successfully deleted %d object versions.", len(blobs_to_delete))

gcsfs/tests/derived/gcsfs_fixtures.py

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,35 @@
1+
import logging
2+
13
import fsspec
24
import pytest
35
from fsspec.tests.abstract import AbstractFixtures
46

57
from gcsfs.core import GCSFileSystem
6-
from gcsfs.tests.conftest import allfiles
8+
from gcsfs.tests.conftest import _cleanup_gcs, allfiles
79
from gcsfs.tests.settings import TEST_BUCKET
810

911

1012
class GcsfsFixtures(AbstractFixtures):
1113
@pytest.fixture(scope="class")
12-
def fs(self, docker_gcs):
14+
def fs(self, docker_gcs, buckets_to_delete):
1315
GCSFileSystem.clear_instance_cache()
1416
gcs = fsspec.filesystem("gcs", endpoint_url=docker_gcs)
15-
try:
16-
# ensure we're empty.
17-
try:
18-
gcs.rm(TEST_BUCKET, recursive=True)
19-
except FileNotFoundError:
20-
pass
21-
try:
17+
try: # ensure we're empty.
18+
# Create the bucket if it doesn't exist, otherwise clean it.
19+
if not gcs.exists(TEST_BUCKET):
20+
buckets_to_delete.add(TEST_BUCKET)
2221
gcs.mkdir(TEST_BUCKET)
23-
except Exception:
24-
pass
22+
else:
23+
try:
24+
gcs.rm(gcs.find(TEST_BUCKET))
25+
except Exception as e:
26+
logging.warning(f"Failed to empty bucket {TEST_BUCKET}: {e}")
2527

2628
gcs.pipe({TEST_BUCKET + "/" + k: v for k, v in allfiles.items()})
2729
gcs.invalidate_cache()
2830
yield gcs
2931
finally:
30-
try:
31-
gcs.rm(gcs.find(TEST_BUCKET))
32-
gcs.rm(TEST_BUCKET)
33-
except: # noqa: E722
34-
pass
32+
_cleanup_gcs(gcs)
3533

3634
@pytest.fixture
3735
def fs_path(self):

gcsfs/tests/settings.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
import os
22

33
TEST_BUCKET = os.getenv("GCSFS_TEST_BUCKET", "gcsfs_test")
4+
TEST_VERSIONED_BUCKET = os.getenv("GCSFS_TEST_VERSIONED_BUCKET", "gcsfs_test_versioned")
5+
TEST_ZONAL_BUCKET = os.getenv("GCSFS_ZONAL_TEST_BUCKET", "gcsfs_zonal_test")
46
TEST_PROJECT = os.getenv("GCSFS_TEST_PROJECT", "project")
5-
TEST_REQUESTER_PAYS_BUCKET = "gcsfs_test_req_pay"
7+
TEST_REQUESTER_PAYS_BUCKET = f"{TEST_BUCKET}_req_pay"
68
TEST_KMS_KEY = os.getenv(
79
"GCSFS_TEST_KMS_KEY",
810
f"projects/{TEST_PROJECT}/locations/us/keyRings/gcsfs_test/cryptKeys/gcsfs_test_key",

0 commit comments

Comments
 (0)