Skip to content

Commit 81fb615

Browse files
Separate versioned and non-versioned tests to use different bucket
1 parent 13fe69d commit 81fb615

File tree

5 files changed

+174
-81
lines changed

5 files changed

+174
-81
lines changed

docs/source/developer.rst

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,10 @@ real GCS. A small number of tests run differently or are skipped.
1818

1919
If you want to actually test against real GCS, then you should set
2020
STORAGE_EMULATOR_HOST to "https://storage.googleapis.com" and also
21-
provide appropriate GCSFS_TEST_BUCKET and GCSFS_TEST_PROJECT, as well
22-
as setting your default google credentials (or providing them via the
23-
fsspec config).
21+
provide appropriate GCSFS_TEST_BUCKET, GCSFS_TEST_VERSIONED_BUCKET
22+
(To use for tests that target GCS object versioning, this bucket must have object versioning enabled)
23+
and GCSFS_TEST_PROJECT, as well as setting your default google
24+
credentials (or providing them via the fsspec config).
2425

2526
.. _fake-gcs-server: https://github.com/fsouza/fake-gcs-server
2627

gcsfs/tests/conftest.py

Lines changed: 61 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,10 @@
99
import fsspec
1010
import pytest
1111
import requests
12+
from google.cloud import storage
1213

1314
from gcsfs import GCSFileSystem
14-
from gcsfs.tests.settings import TEST_BUCKET
15+
from gcsfs.tests.settings import TEST_BUCKET, TEST_VERSIONED_BUCKET
1516

1617
files = {
1718
"test/accounts.1.json": (
@@ -176,21 +177,68 @@ def extended_gcsfs(gcs_factory, populate=True):
176177
def gcs_versioned(gcs_factory):
177178
gcs = gcs_factory()
178179
gcs.version_aware = True
179-
try:
180-
try:
181-
gcs.rm(gcs.find(TEST_BUCKET, versions=True))
182-
except FileNotFoundError:
183-
pass
184-
185-
try:
186-
gcs.mkdir(TEST_BUCKET, enable_versioning=True)
187-
except Exception:
188-
pass
180+
is_real_gcs = (
181+
os.environ.get("STORAGE_EMULATOR_HOST") == "https://storage.googleapis.com"
182+
)
183+
try: # ensure we're empty.
184+
if is_real_gcs:
185+
# For real GCS, we assume the bucket exists and only clean its contents.
186+
try:
187+
cleanup_versioned_bucket(gcs, TEST_VERSIONED_BUCKET)
188+
except Exception as e:
189+
logging.warning(
190+
f"Failed to empty versioned bucket {TEST_VERSIONED_BUCKET}: {e}"
191+
)
192+
else:
193+
# For emulators, we delete and recreate the bucket for a clean state.
194+
try:
195+
gcs.rm(TEST_VERSIONED_BUCKET, recursive=True)
196+
except FileNotFoundError:
197+
pass
198+
gcs.mkdir(TEST_VERSIONED_BUCKET, enable_versioning=True)
189199
gcs.invalidate_cache()
190200
yield gcs
191201
finally:
192202
try:
193-
gcs.rm(gcs.find(TEST_BUCKET, versions=True))
194-
gcs.rm(TEST_BUCKET)
203+
if not is_real_gcs:
204+
gcs.rm(gcs.find(TEST_VERSIONED_BUCKET, versions=True))
205+
gcs.rm(TEST_VERSIONED_BUCKET)
195206
except: # noqa: E722
196207
pass
208+
209+
210+
def cleanup_versioned_bucket(gcs, bucket_name, prefix=None):
211+
"""
212+
Deletes all object versions in a bucket using the google-cloud-storage client,
213+
ensuring it uses the same credentials as the gcsfs instance.
214+
"""
215+
# Define a retry policy for API calls to handle rate limiting.
216+
# This can retry on 429 Too Many Requests errors, which can happen
217+
# when deleting many object versions quickly.
218+
from google.api_core.retry import Retry
219+
220+
retry_policy = Retry(
221+
initial=1.0, # Initial delay in seconds
222+
maximum=30.0, # Maximum delay in seconds
223+
multiplier=1.2, # Backoff factor
224+
)
225+
226+
client = storage.Client(
227+
credentials=gcs.credentials.credentials, project=gcs.project
228+
)
229+
230+
# List all blobs, including old versions
231+
blobs_to_delete = list(client.list_blobs(bucket_name, versions=True, prefix=prefix))
232+
233+
if not blobs_to_delete:
234+
logging.info("No object versions to delete in %s.", bucket_name)
235+
return
236+
237+
logging.info(
238+
"Deleting %d object versions from %s.", len(blobs_to_delete), bucket_name
239+
)
240+
time.sleep(2)
241+
for blob in blobs_to_delete:
242+
blob.delete(retry=retry_policy)
243+
244+
logging.info("Successfully deleted %d object versions.", len(blobs_to_delete))

gcsfs/tests/settings.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import os
22

33
TEST_BUCKET = os.getenv("GCSFS_TEST_BUCKET", "gcsfs_test")
4+
TEST_VERSIONED_BUCKET = os.getenv("GCSFS_TEST_VERSIONED_BUCKET", "gcsfs_test_versioned")
45
TEST_PROJECT = os.getenv("GCSFS_TEST_PROJECT", "project")
5-
TEST_REQUESTER_PAYS_BUCKET = "gcsfs_test_req_pay"
6+
TEST_REQUESTER_PAYS_BUCKET = f"{TEST_BUCKET}_req_pay"
67
TEST_KMS_KEY = os.getenv(
78
"GCSFS_TEST_KMS_KEY",
89
f"projects/{TEST_PROJECT}/locations/us/keyRings/gcsfs_test/cryptKeys/gcsfs_test_key",

gcsfs/tests/test_core.py

Lines changed: 0 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1392,70 +1392,6 @@ def test_deep_find_wthdirs(gcs):
13921392
]
13931393

13941394

1395-
def test_info_versioned(gcs_versioned):
1396-
with gcs_versioned.open(a, "wb") as wo:
1397-
wo.write(b"v1")
1398-
v1 = gcs_versioned.info(a)["generation"]
1399-
assert v1 is not None
1400-
with gcs_versioned.open(a, "wb") as wo:
1401-
wo.write(b"v2")
1402-
v2 = gcs_versioned.info(a)["generation"]
1403-
assert v2 is not None and v1 != v2
1404-
assert gcs_versioned.info(f"{a}#{v1}")["generation"] == v1
1405-
assert gcs_versioned.info(f"{a}?generation={v2}")["generation"] == v2
1406-
1407-
1408-
def test_cat_versioned(gcs_versioned):
1409-
with gcs_versioned.open(a, "wb") as wo:
1410-
wo.write(b"v1")
1411-
v1 = gcs_versioned.info(a)["generation"]
1412-
assert v1 is not None
1413-
with gcs_versioned.open(a, "wb") as wo:
1414-
wo.write(b"v2")
1415-
gcs_versioned.cat(f"{a}#{v1}") == b"v1"
1416-
1417-
1418-
def test_cp_versioned(gcs_versioned):
1419-
with gcs_versioned.open(a, "wb") as wo:
1420-
wo.write(b"v1")
1421-
v1 = gcs_versioned.info(a)["generation"]
1422-
assert v1 is not None
1423-
with gcs_versioned.open(a, "wb") as wo:
1424-
wo.write(b"v2")
1425-
gcs_versioned.cp_file(f"{a}#{v1}", b)
1426-
assert gcs_versioned.cat(b) == b"v1"
1427-
1428-
1429-
def test_ls_versioned(gcs_versioned):
1430-
import posixpath
1431-
1432-
with gcs_versioned.open(a, "wb") as wo:
1433-
wo.write(b"v1")
1434-
v1 = gcs_versioned.info(a)["generation"]
1435-
with gcs_versioned.open(a, "wb") as wo:
1436-
wo.write(b"v2")
1437-
v2 = gcs_versioned.info(a)["generation"]
1438-
dpath = posixpath.dirname(a)
1439-
versions = {f"{a}#{v1}", f"{a}#{v2}"}
1440-
assert versions == set(gcs_versioned.ls(dpath, versions=True))
1441-
assert versions == {
1442-
entry["name"] for entry in gcs_versioned.ls(dpath, detail=True, versions=True)
1443-
}
1444-
assert gcs_versioned.ls(TEST_BUCKET, versions=True) == ["gcsfs_test/tmp"]
1445-
1446-
1447-
def test_find_versioned(gcs_versioned):
1448-
with gcs_versioned.open(a, "wb") as wo:
1449-
wo.write(b"v1")
1450-
v1 = gcs_versioned.info(a)["generation"]
1451-
with gcs_versioned.open(a, "wb") as wo:
1452-
wo.write(b"v2")
1453-
v2 = gcs_versioned.info(a)["generation"]
1454-
versions = {f"{a}#{v1}", f"{a}#{v2}"}
1455-
assert versions == set(gcs_versioned.find(a, versions=True))
1456-
assert versions == set(gcs_versioned.find(a, detail=True, versions=True))
1457-
1458-
14591395
def test_cp_directory_recursive(gcs):
14601396
src = TEST_BUCKET + "/src"
14611397
src_file = src + "/file"

gcsfs/tests/test_core_versioned.py

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
import os
2+
import posixpath
3+
4+
import pytest
5+
from google.cloud import storage
6+
7+
from gcsfs import GCSFileSystem
8+
from gcsfs.tests.settings import TEST_VERSIONED_BUCKET
9+
10+
a = TEST_VERSIONED_BUCKET + "/tmp/test/a"
11+
b = TEST_VERSIONED_BUCKET + "/tmp/test/b"
12+
13+
14+
def is_versioning_enabled():
15+
"""
16+
Helper function to check if the test bucket has versioning enabled.
17+
Returns a tuple of (bool, reason_string).
18+
"""
19+
# Don't skip when using an emulator, as we create the versioned bucket ourselves.
20+
if os.environ.get("STORAGE_EMULATOR_HOST") != "https://storage.googleapis.com":
21+
return True, ""
22+
try:
23+
gcs = GCSFileSystem(project=os.getenv("GCSFS_TEST_PROJECT", "project"))
24+
client = storage.Client(
25+
credentials=gcs.credentials.credentials, project=gcs.project
26+
)
27+
bucket = client.get_bucket(TEST_VERSIONED_BUCKET)
28+
if bucket.versioning_enabled:
29+
return True, ""
30+
return (
31+
False,
32+
f"Bucket '{TEST_VERSIONED_BUCKET}' does not have versioning enabled.",
33+
)
34+
except Exception as e:
35+
return (
36+
False,
37+
f"Could not verify versioning status for bucket '{TEST_VERSIONED_BUCKET}': {e}",
38+
)
39+
40+
41+
pytestmark = pytest.mark.skipif(
42+
not is_versioning_enabled()[0], reason=is_versioning_enabled()[1]
43+
)
44+
45+
46+
def test_info_versioned(gcs_versioned):
47+
with gcs_versioned.open(a, "wb") as wo:
48+
wo.write(b"v1")
49+
v1 = gcs_versioned.info(a)["generation"]
50+
assert v1 is not None
51+
with gcs_versioned.open(a, "wb") as wo:
52+
wo.write(b"v2")
53+
v2 = gcs_versioned.info(a)["generation"]
54+
assert v2 is not None and v1 != v2
55+
assert gcs_versioned.info(f"{a}#{v1}")["generation"] == v1
56+
assert gcs_versioned.info(f"{a}?generation={v2}")["generation"] == v2
57+
58+
59+
def test_cat_versioned(gcs_versioned):
60+
with gcs_versioned.open(b, "wb") as wo:
61+
wo.write(b"v1")
62+
v1 = gcs_versioned.info(b)["generation"]
63+
assert v1 is not None
64+
with gcs_versioned.open(b, "wb") as wo:
65+
wo.write(b"v2")
66+
assert gcs_versioned.cat(f"{b}#{v1}") == b"v1"
67+
68+
69+
def test_cp_versioned(gcs_versioned):
70+
with gcs_versioned.open(a, "wb") as wo:
71+
wo.write(b"v1")
72+
v1 = gcs_versioned.info(a)["generation"]
73+
assert v1 is not None
74+
with gcs_versioned.open(a, "wb") as wo:
75+
wo.write(b"v2")
76+
gcs_versioned.cp_file(f"{a}#{v1}", b)
77+
assert gcs_versioned.cat(b) == b"v1"
78+
79+
80+
def test_ls_versioned(gcs_versioned):
81+
with gcs_versioned.open(b, "wb") as wo:
82+
wo.write(b"v1")
83+
v1 = gcs_versioned.info(b)["generation"]
84+
with gcs_versioned.open(b, "wb") as wo:
85+
wo.write(b"v2")
86+
v2 = gcs_versioned.info(b)["generation"]
87+
dpath = posixpath.dirname(b)
88+
versions = {f"{b}#{v1}", f"{b}#{v2}"}
89+
assert versions == set(gcs_versioned.ls(dpath, versions=True))
90+
assert versions == {
91+
entry["name"] for entry in gcs_versioned.ls(dpath, detail=True, versions=True)
92+
}
93+
assert gcs_versioned.ls(TEST_VERSIONED_BUCKET, versions=True) == [
94+
f"{TEST_VERSIONED_BUCKET}/tmp"
95+
]
96+
97+
98+
def test_find_versioned(gcs_versioned):
99+
with gcs_versioned.open(a, "wb") as wo:
100+
wo.write(b"v1")
101+
v1 = gcs_versioned.info(a)["generation"]
102+
with gcs_versioned.open(a, "wb") as wo:
103+
wo.write(b"v2")
104+
v2 = gcs_versioned.info(a)["generation"]
105+
versions = {f"{a}#{v1}", f"{a}#{v2}"}
106+
assert versions == set(gcs_versioned.find(a, versions=True))
107+
assert versions == set(gcs_versioned.find(a, detail=True, versions=True))

0 commit comments

Comments
 (0)