Skip to content

Commit 5112276

Browse files
Update cleanup logic in tests to empty the bucket instead of deleting the bucket
1 parent 81fb615 commit 5112276

File tree

6 files changed

+134
-68
lines changed

6 files changed

+134
-68
lines changed

docs/source/developer.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ real GCS. A small number of tests run differently or are skipped.
1919
If you want to actually test against real GCS, then you should set
2020
STORAGE_EMULATOR_HOST to "https://storage.googleapis.com" and also
2121
provide appropriate GCSFS_TEST_BUCKET, GCSFS_TEST_VERSIONED_BUCKET
22-
(To use for tests that target GCS object versioning, this bucket must have object versioning enabled)
23-
and GCSFS_TEST_PROJECT, as well as setting your default google
24-
credentials (or providing them via the fsspec config).
22+
(To use for tests that target GCS object versioning, this bucket must have versioning enabled),
23+
GCSFS_ZONAL_TEST_BUCKET(To use for testing Rapid storage features) and GCSFS_TEST_PROJECT,
24+
as well as setting your default google credentials (or providing them via the fsspec config).
2525

2626
.. _fake-gcs-server: https://github.com/fsouza/fake-gcs-server
2727

gcsfs/tests/conftest.py

Lines changed: 91 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from google.cloud import storage
1313

1414
from gcsfs import GCSFileSystem
15-
from gcsfs.tests.settings import TEST_BUCKET, TEST_VERSIONED_BUCKET
15+
from gcsfs.tests.settings import TEST_BUCKET, TEST_VERSIONED_BUCKET, TEST_ZONAL_BUCKET
1616

1717
files = {
1818
"test/accounts.1.json": (
@@ -60,7 +60,7 @@ def stop_docker(container):
6060
subprocess.call(["docker", "rm", "-f", "-v", cid])
6161

6262

63-
@pytest.fixture(scope="module")
63+
@pytest.fixture(scope="session")
6464
def docker_gcs():
6565
if "STORAGE_EMULATOR_HOST" in os.environ:
6666
# assume using real API or otherwise have a server already set up
@@ -91,7 +91,7 @@ def docker_gcs():
9191
stop_docker(container)
9292

9393

94-
@pytest.fixture
94+
@pytest.fixture(scope="session")
9595
def gcs_factory(docker_gcs):
9696
params["endpoint_url"] = docker_gcs
9797

@@ -102,44 +102,83 @@ def factory(**kwargs):
102102
return factory
103103

104104

105+
@pytest.fixture(scope="session")
106+
def buckets_to_delete():
107+
"""A set to keep track of buckets created during the test session."""
108+
return set()
109+
110+
105111
@pytest.fixture
106-
def gcs(gcs_factory, populate=True):
112+
def gcs(gcs_factory, buckets_to_delete, populate=True):
107113
gcs = gcs_factory()
108-
try:
109-
# ensure we're empty.
110-
try:
111-
gcs.rm(TEST_BUCKET, recursive=True)
112-
except FileNotFoundError:
113-
pass
114-
try:
114+
try: # ensure we're empty.
115+
# Create the bucket if it doesn't exist, otherwise clean it.
116+
if not gcs.exists(TEST_BUCKET):
115117
gcs.mkdir(TEST_BUCKET)
116-
except Exception:
117-
pass
118+
buckets_to_delete.add(TEST_BUCKET)
119+
else:
120+
try:
121+
gcs.rm(gcs.find(TEST_BUCKET))
122+
except Exception as e:
123+
logging.warning(f"Failed to empty bucket {TEST_BUCKET}: {e}")
118124

119125
if populate:
120126
gcs.pipe({TEST_BUCKET + "/" + k: v for k, v in allfiles.items()})
121127
gcs.invalidate_cache()
122128
yield gcs
123129
finally:
124-
try:
125-
gcs.rm(gcs.find(TEST_BUCKET))
126-
gcs.rm(TEST_BUCKET)
127-
except: # noqa: E722
128-
pass
130+
_cleanup_gcs(gcs)
129131

130132

131-
def _cleanup_gcs(gcs, is_real_gcs):
132-
"""Only remove the bucket/contents if we are NOT using the real GCS, logging a warning on failure."""
133-
if is_real_gcs:
134-
return
133+
def _cleanup_gcs(gcs):
134+
"""Clean the bucket contents, logging a warning on failure."""
135135
try:
136-
gcs.rm(TEST_BUCKET, recursive=True)
136+
gcs.rm(gcs.find(TEST_BUCKET))
137137
except Exception as e:
138138
logging.warning(f"Failed to clean up GCS bucket {TEST_BUCKET}: {e}")
139139

140140

141+
@pytest.fixture(scope="session", autouse=True)
142+
def final_cleanup(gcs_factory, buckets_to_delete):
143+
"""A session-scoped fixture to delete the test buckets after all tests are run."""
144+
yield
145+
# This code runs after the entire test session finishes
146+
use_extended_gcs = os.getenv(
147+
"GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT", "false"
148+
).lower() in (
149+
"true",
150+
"1",
151+
)
152+
153+
if use_extended_gcs:
154+
is_real_gcs = (
155+
os.environ.get("STORAGE_EMULATOR_HOST") == "https://storage.googleapis.com"
156+
)
157+
mock_authentication_manager = (
158+
patch("google.auth.default", return_value=(None, "fake-project"))
159+
if not is_real_gcs
160+
else nullcontext()
161+
)
162+
else:
163+
mock_authentication_manager = nullcontext()
164+
165+
with mock_authentication_manager:
166+
gcs = gcs_factory()
167+
for bucket in buckets_to_delete:
168+
# For real GCS, only delete if created by the test suite.
169+
# For emulators, always delete.
170+
try:
171+
if gcs.exists(bucket):
172+
gcs.rm(bucket, recursive=True)
173+
logging.info(f"Cleaned up bucket: {bucket}")
174+
except Exception as e:
175+
logging.warning(
176+
f"Failed to perform final cleanup for bucket {bucket}: {e}"
177+
)
178+
179+
141180
@pytest.fixture
142-
def extended_gcsfs(gcs_factory, populate=True):
181+
def extended_gcsfs(gcs_factory, buckets_to_delete, populate=True):
143182
# Check if we are running against a real GCS endpoint
144183
is_real_gcs = (
145184
os.environ.get("STORAGE_EMULATOR_HOST") == "https://storage.googleapis.com"
@@ -159,52 +198,62 @@ def extended_gcsfs(gcs_factory, populate=True):
159198
# Only create/delete/populate the bucket if we are NOT using the real GCS endpoint
160199
if not is_real_gcs:
161200
try:
162-
extended_gcsfs.rm(TEST_BUCKET, recursive=True)
201+
extended_gcsfs.rm(TEST_ZONAL_BUCKET, recursive=True)
163202
except FileNotFoundError:
164203
pass
165-
extended_gcsfs.mkdir(TEST_BUCKET)
204+
extended_gcsfs.mkdir(TEST_ZONAL_BUCKET)
205+
buckets_to_delete.add(TEST_ZONAL_BUCKET)
166206
if populate:
167207
extended_gcsfs.pipe(
168-
{TEST_BUCKET + "/" + k: v for k, v in allfiles.items()}
208+
{TEST_ZONAL_BUCKET + "/" + k: v for k, v in allfiles.items()}
169209
)
170210
extended_gcsfs.invalidate_cache()
171211
yield extended_gcsfs
172212
finally:
173-
_cleanup_gcs(extended_gcsfs, is_real_gcs)
213+
_cleanup_gcs(extended_gcsfs)
174214

175215

176216
@pytest.fixture
177-
def gcs_versioned(gcs_factory):
217+
def gcs_versioned(gcs_factory, buckets_to_delete):
178218
gcs = gcs_factory()
179219
gcs.version_aware = True
180220
is_real_gcs = (
181221
os.environ.get("STORAGE_EMULATOR_HOST") == "https://storage.googleapis.com"
182222
)
183223
try: # ensure we're empty.
224+
# The versioned bucket might be created by `is_versioning_enabled`
225+
# in test_core_versioned.py. We must register it for cleanup only if
226+
# it was created by this test run.
227+
try:
228+
from gcsfs.tests.test_core_versioned import (
229+
_VERSIONED_BUCKET_CREATED_BY_TESTS,
230+
)
231+
232+
if _VERSIONED_BUCKET_CREATED_BY_TESTS:
233+
buckets_to_delete.add(TEST_VERSIONED_BUCKET)
234+
except ImportError:
235+
pass # test_core_versioned is not being run
184236
if is_real_gcs:
185-
# For real GCS, we assume the bucket exists and only clean its contents.
186-
try:
187-
cleanup_versioned_bucket(gcs, TEST_VERSIONED_BUCKET)
188-
except Exception as e:
189-
logging.warning(
190-
f"Failed to empty versioned bucket {TEST_VERSIONED_BUCKET}: {e}"
191-
)
237+
cleanup_versioned_bucket(gcs, TEST_VERSIONED_BUCKET)
192238
else:
193-
# For emulators, we delete and recreate the bucket for a clean state.
239+
# For emulators, we delete and recreate the bucket for a clean state
194240
try:
195241
gcs.rm(TEST_VERSIONED_BUCKET, recursive=True)
196242
except FileNotFoundError:
197243
pass
198244
gcs.mkdir(TEST_VERSIONED_BUCKET, enable_versioning=True)
245+
buckets_to_delete.add(TEST_VERSIONED_BUCKET)
199246
gcs.invalidate_cache()
200247
yield gcs
201248
finally:
249+
# Ensure the bucket is empty after the test.
202250
try:
203-
if not is_real_gcs:
204-
gcs.rm(gcs.find(TEST_VERSIONED_BUCKET, versions=True))
205-
gcs.rm(TEST_VERSIONED_BUCKET)
206-
except: # noqa: E722
207-
pass
251+
if is_real_gcs:
252+
cleanup_versioned_bucket(gcs, TEST_VERSIONED_BUCKET)
253+
except Exception as e:
254+
logging.warning(
255+
f"Failed to clean up versioned bucket {TEST_VERSIONED_BUCKET} after test: {e}"
256+
)
208257

209258

210259
def cleanup_versioned_bucket(gcs, bucket_name, prefix=None):

gcsfs/tests/derived/gcsfs_fixtures.py

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,35 @@
1+
import logging
2+
13
import fsspec
24
import pytest
35
from fsspec.tests.abstract import AbstractFixtures
46

57
from gcsfs.core import GCSFileSystem
6-
from gcsfs.tests.conftest import allfiles
8+
from gcsfs.tests.conftest import _cleanup_gcs, allfiles
79
from gcsfs.tests.settings import TEST_BUCKET
810

911

1012
class GcsfsFixtures(AbstractFixtures):
1113
@pytest.fixture(scope="class")
12-
def fs(self, docker_gcs):
14+
def fs(self, docker_gcs, buckets_to_delete):
1315
GCSFileSystem.clear_instance_cache()
1416
gcs = fsspec.filesystem("gcs", endpoint_url=docker_gcs)
15-
try:
16-
# ensure we're empty.
17-
try:
18-
gcs.rm(TEST_BUCKET, recursive=True)
19-
except FileNotFoundError:
20-
pass
21-
try:
17+
try: # ensure we're empty.
18+
# Create the bucket if it doesn't exist, otherwise clean it.
19+
if not gcs.exists(TEST_BUCKET):
20+
buckets_to_delete.add(TEST_BUCKET)
2221
gcs.mkdir(TEST_BUCKET)
23-
except Exception:
24-
pass
22+
else:
23+
try:
24+
gcs.rm(gcs.find(TEST_BUCKET))
25+
except Exception as e:
26+
logging.warning(f"Failed to empty bucket {TEST_BUCKET}: {e}")
2527

2628
gcs.pipe({TEST_BUCKET + "/" + k: v for k, v in allfiles.items()})
2729
gcs.invalidate_cache()
2830
yield gcs
2931
finally:
30-
try:
31-
gcs.rm(gcs.find(TEST_BUCKET))
32-
gcs.rm(TEST_BUCKET)
33-
except: # noqa: E722
34-
pass
32+
_cleanup_gcs(gcs)
3533

3634
@pytest.fixture
3735
def fs_path(self):

gcsfs/tests/settings.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
TEST_BUCKET = os.getenv("GCSFS_TEST_BUCKET", "gcsfs_test")
44
TEST_VERSIONED_BUCKET = os.getenv("GCSFS_TEST_VERSIONED_BUCKET", "gcsfs_test_versioned")
5+
TEST_ZONAL_BUCKET = os.getenv("GCSFS_ZONAL_TEST_BUCKET", "gcsfs_zonal_test")
56
TEST_PROJECT = os.getenv("GCSFS_TEST_PROJECT", "project")
67
TEST_REQUESTER_PAYS_BUCKET = f"{TEST_BUCKET}_req_pay"
78
TEST_KMS_KEY = os.getenv(

gcsfs/tests/test_core_versioned.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import logging
12
import os
23
import posixpath
34

@@ -10,17 +11,28 @@
1011
a = TEST_VERSIONED_BUCKET + "/tmp/test/a"
1112
b = TEST_VERSIONED_BUCKET + "/tmp/test/b"
1213

14+
# Flag to track if the bucket was created by this test run.
15+
_VERSIONED_BUCKET_CREATED_BY_TESTS = False
16+
1317

1418
def is_versioning_enabled():
1519
"""
1620
Helper function to check if the test bucket has versioning enabled.
1721
Returns a tuple of (bool, reason_string).
1822
"""
1923
# Don't skip when using an emulator, as we create the versioned bucket ourselves.
24+
global _VERSIONED_BUCKET_CREATED_BY_TESTS
2025
if os.environ.get("STORAGE_EMULATOR_HOST") != "https://storage.googleapis.com":
2126
return True, ""
2227
try:
2328
gcs = GCSFileSystem(project=os.getenv("GCSFS_TEST_PROJECT", "project"))
29+
if not gcs.exists(TEST_VERSIONED_BUCKET):
30+
logging.info(
31+
f"Creating versioned bucket for tests: {TEST_VERSIONED_BUCKET}"
32+
)
33+
gcs.mkdir(TEST_VERSIONED_BUCKET, enable_versioning=True)
34+
_VERSIONED_BUCKET_CREATED_BY_TESTS = True
35+
2436
client = storage.Client(
2537
credentials=gcs.credentials.credentials, project=gcs.project
2638
)

gcsfs/tests/test_extended_gcsfs.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,21 @@
1111
from google.cloud.storage.exceptions import DataCorruption
1212

1313
from gcsfs.extended_gcsfs import BucketType
14-
from gcsfs.tests.conftest import a, b, c, csv_files, files, text_files
15-
from gcsfs.tests.settings import TEST_BUCKET
14+
from gcsfs.tests.conftest import csv_files, files, text_files
15+
from gcsfs.tests.settings import TEST_ZONAL_BUCKET
1616

1717
file = "test/accounts.1.json"
18-
file_path = f"{TEST_BUCKET}/{file}"
18+
file_path = f"{TEST_ZONAL_BUCKET}/{file}"
1919
json_data = files[file]
2020
lines = io.BytesIO(json_data).readlines()
2121
file_size = len(json_data)
2222

2323
REQUIRED_ENV_VAR = "GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT"
2424

25+
a = TEST_ZONAL_BUCKET + "/tmp/test/a"
26+
b = TEST_ZONAL_BUCKET + "/tmp/test/b"
27+
c = TEST_ZONAL_BUCKET + "/tmp/test/c"
28+
2529
# If the condition is True, only then tests in this file are run.
2630
should_run = os.getenv(REQUIRED_ENV_VAR, "false").lower() in (
2731
"true",
@@ -131,7 +135,7 @@ def test_read_block_zb(extended_gcsfs, zonal_mocks, subtests):
131135
assert result == expected_data
132136
if mocks:
133137
mocks["sync_lookup_bucket_type"].assert_called_once_with(
134-
TEST_BUCKET
138+
TEST_ZONAL_BUCKET
135139
)
136140
if expected_data:
137141
mocks["downloader"].download_ranges.assert_called_with(
@@ -143,7 +147,7 @@ def test_read_block_zb(extended_gcsfs, zonal_mocks, subtests):
143147

144148
def test_read_small_zb(extended_gcsfs, zonal_mocks):
145149
csv_file = "2014-01-01.csv"
146-
csv_file_path = f"{TEST_BUCKET}/{csv_file}"
150+
csv_file_path = f"{TEST_ZONAL_BUCKET}/{csv_file}"
147151
csv_data = csv_files[csv_file]
148152

149153
with zonal_mocks(csv_data) as mocks:
@@ -160,7 +164,9 @@ def test_read_small_zb(extended_gcsfs, zonal_mocks):
160164
# cache drop
161165
assert len(f.cache.cache) < len(out)
162166
if mocks:
163-
mocks["sync_lookup_bucket_type"].assert_called_once_with(TEST_BUCKET)
167+
mocks["sync_lookup_bucket_type"].assert_called_once_with(
168+
TEST_ZONAL_BUCKET
169+
)
164170

165171

166172
def test_readline_zb(extended_gcsfs, zonal_mocks):
@@ -169,7 +175,7 @@ def test_readline_zb(extended_gcsfs, zonal_mocks):
169175
)
170176
for k, data in all_items:
171177
with zonal_mocks(data):
172-
with extended_gcsfs.open("/".join([TEST_BUCKET, k]), "rb") as f:
178+
with extended_gcsfs.open("/".join([TEST_ZONAL_BUCKET, k]), "rb") as f:
173179
result = f.readline()
174180
expected = data.split(b"\n")[0] + (b"\n" if data.count(b"\n") else b"")
175181
assert result == expected

0 commit comments

Comments
 (0)