Skip to content

Commit d1bb2cc

Browse files
support creation of zonal buckets using mkdir (#769)
* support creation of zonal buckets using mkdir * pass location to super mkdir() only if it is not none * pass the correct kwargs to the fallback mkdir * add coverage for passing location parameter
1 parent 68c9fd0 commit d1bb2cc

File tree

3 files changed

+165
-33
lines changed

3 files changed

+165
-33
lines changed

gcsfs/extended_gcsfs.py

Lines changed: 85 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -560,56 +560,108 @@ async def _list_objects(self, path, prefix="", versions=False, **kwargs):
560560
raise
561561

562562
async def _mkdir(
563-
self, path, create_parents=False, enable_hierarchical_namespace=False, **kwargs
563+
self,
564+
path,
565+
create_parents=False,
566+
enable_hierarchical_namespace=False,
567+
placement=None,
568+
location=None,
569+
**kwargs,
564570
):
565571
"""
566-
If the path does not contain an object key, a new bucket is created.
567-
If `enable_hierarchical_namespace` is True, the bucket will have Hierarchical Namespace enabled.
568-
569-
For HNS-enabled buckets, this method creates a folder object. If
570-
`create_parents` is True, any missing parent folders are also created.
571-
572-
If bucket doesn't exist, enable_hierarchical_namespace and create_parents are set to True
573-
and the path includes a key then HNS-enabled bucket will be created
574-
and also the folders within that bucket.
572+
Create a directory or bucket.
575573
576-
If `create_parents` is False and a parent does not exist, a
577-
FileNotFoundError is raised.
574+
If the path refers to a bucket (no object key), a new bucket is created.
575+
If the path refers to a directory (includes object key), a directory is created.
578576
579-
For non-HNS buckets, it falls back to the parent implementation which
580-
may involve creating a bucket or doing nothing (as GCS has no true empty directories).
577+
Parameters
578+
----------
579+
path : str
580+
Path to create.
581+
create_parents : bool
582+
If True, create parent directories if they do not exist.
583+
If the path includes a bucket that does not exist, the bucket will also be created.
584+
enable_hierarchical_namespace : bool
585+
If True, and a bucket is being created, the bucket will have Hierarchical
586+
Namespace (HNS) enabled.
587+
placement : str, optional
588+
If set to a zone (e.g. "us-central1-a"), a Zonal bucket is created.
589+
Zonal buckets are HNS-enabled by default.
590+
When creating a Zonal bucket, `location` must be passed as a
591+
region (e.g. "us-central1"). If `location` is not specified, it defaults
592+
to `self.default_location`. The zone specified in `placement` must belong
593+
to the region specified in `location`.
594+
location : str, optional
595+
Location where buckets are created, like 'US' or 'EUROPE-WEST3'.
596+
If not provided, defaults to `self.default_location`.
597+
**kwargs : dict
598+
Additional arguments passed to the bucket creation API.
599+
600+
Notes
601+
-----
602+
- For HNS-enabled buckets (including Zonal buckets), this method creates a
603+
native folder object.
604+
- If `create_parents` is False and a parent directory does not exist in an
605+
HNS/Zonal bucket, a FileNotFoundError is raised.
606+
- For non-HNS buckets, this falls back to the parent implementation. Since
607+
standard GCS has no true directories, `mkdir` on a path with a key is
608+
typically a no-op unless `create_parents=True` triggers bucket creation.
581609
"""
582610
path = self._strip_protocol(path)
583-
if enable_hierarchical_namespace:
584-
kwargs["hierarchicalNamespace"] = {"enabled": True}
611+
bucket, key, _ = self.split_path(path)
612+
613+
# Determine if we are requesting creation of a Zonal or HNS bucket
614+
should_create_zonal_bucket = placement is not None
615+
should_create_hns_bucket = (
616+
enable_hierarchical_namespace or should_create_zonal_bucket
617+
)
618+
619+
# Prepare arguments for bucket creation
620+
bucket_kwargs = kwargs.copy()
621+
if location:
622+
bucket_kwargs["location"] = location
623+
if should_create_zonal_bucket:
624+
bucket_kwargs["customPlacementConfig"] = {"dataLocations": [placement]}
625+
bucket_kwargs["storageClass"] = "RAPID"
626+
627+
if should_create_hns_bucket:
628+
bucket_kwargs["hierarchicalNamespace"] = {"enabled": True}
585629
# HNS buckets require uniform bucket-level access.
586-
kwargs["iamConfiguration"] = {"uniformBucketLevelAccess": {"enabled": True}}
630+
bucket_kwargs["iamConfiguration"] = {
631+
"uniformBucketLevelAccess": {"enabled": True}
632+
}
587633
# When uniformBucketLevelAccess is enabled, ACLs cannot be used.
588634
# We must explicitly set them to None to prevent the parent
589635
# method from using default ACLs.
590-
kwargs["acl"] = None
591-
kwargs["default_acl"] = None
636+
bucket_kwargs["acl"] = None
637+
bucket_kwargs["default_acl"] = None
592638

593-
bucket, key, _ = self.split_path(path)
594-
# If the key is empty, the path refers to a bucket, not an object.
595-
# Defer to the parent method to handle bucket creation.
639+
# Case 1: Path is just a bucket
596640
if not key:
597-
return await super()._mkdir(path, create_parents=create_parents, **kwargs)
641+
return await super()._mkdir(
642+
path, create_parents=create_parents, **bucket_kwargs
643+
)
598644

599-
is_hns = False
600-
# If creating an HNS bucket, check for its existence first.
601-
if create_parents and enable_hierarchical_namespace:
645+
# Case 2: Path is a folder
646+
is_hns_bucket = False
647+
648+
# If creating parents and HNS/Zonal requested, ensure bucket exists with correct config
649+
if create_parents and should_create_hns_bucket:
602650
if not await self._exists(bucket):
603-
await super()._mkdir(bucket, create_parents=True, **kwargs)
604-
is_hns = True # Skip HNS check since we just created it.
651+
await super()._mkdir(bucket, create_parents=True, **bucket_kwargs)
652+
is_hns_bucket = True
605653

606-
if not is_hns:
607-
# If the bucket was not created above, we need to check its type.
608-
is_hns = await self._is_bucket_hns_enabled(bucket)
654+
if not is_hns_bucket:
655+
is_hns_bucket = await self._is_bucket_hns_enabled(bucket)
609656

610-
if not is_hns:
611-
return await super()._mkdir(path, create_parents=create_parents, **kwargs)
657+
if is_hns_bucket:
658+
return await self._create_hns_folder(path, bucket, key, create_parents)
659+
660+
return await super()._mkdir(
661+
path, create_parents=create_parents, **bucket_kwargs
662+
)
612663

664+
async def _create_hns_folder(self, path, bucket, key, create_parents):
613665
logger.debug(f"Using HNS-aware mkdir for '{path}'.")
614666
parent = f"projects/_/buckets/{bucket}"
615667
folder_id = key.rstrip("/")

gcsfs/tests/integration/test_extended_hns.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,30 @@ def test_mkdir_hns_bucket_with_create_parents_succeeds(
397397

398398
assert gcsfs._sync_lookup_bucket_type(bucket_name) is BucketType.HIERARCHICAL
399399

400+
def test_mkdir_for_zonal_bucket(self, gcs_hns, buckets_to_delete):
401+
"""Test creating a Zonal bucket using the placement parameter (string)."""
402+
gcsfs = gcs_hns
403+
bucket_name = f"gcsfs-zonal-bucket-{uuid.uuid4()}"
404+
dir_path = f"{bucket_name}/some_dir"
405+
buckets_to_delete.add(bucket_name)
406+
407+
# Use a valid zone for placement
408+
placement = "us-central1-a"
409+
location = "us-central1"
410+
411+
assert not gcsfs.exists(bucket_name)
412+
gcsfs.mkdir(
413+
dir_path, create_parents=True, placement=placement, location=location
414+
)
415+
assert gcsfs.exists(bucket_name)
416+
assert gcsfs.exists(dir_path)
417+
assert (
418+
gcsfs._sync_lookup_bucket_type(bucket_name) == BucketType.ZONAL_HIERARCHICAL
419+
)
420+
421+
info = gcsfs.info(bucket_name)
422+
assert info["location"] == location.upper()
423+
400424
def test_mkdir_create_non_hns_bucket(self, gcs_hns, buckets_to_delete):
401425
"""Test creating a new non-HNS bucket by default."""
402426
gcsfs = gcs_hns

gcsfs/tests/test_extended_hns_gcsfs.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -921,6 +921,62 @@ def test_mkdir_existing_hns_folder_is_noop(self, gcs_hns, gcs_hns_mocks):
921921
mocks["super_mkdir"].assert_not_called()
922922
mocks["async_lookup_bucket_type"].assert_called_once_with(TEST_HNS_BUCKET)
923923

924+
def test_mkdir_with_placement_string(self, gcs_hns, gcs_hns_mocks):
925+
"""Test mkdir with placement as a string (Zonal bucket)."""
926+
gcsfs = gcs_hns
927+
bucket_path = "new-zonal-bucket"
928+
placement = "us-central1-a"
929+
930+
with gcs_hns_mocks(BucketType.UNKNOWN, gcsfs) as mocks:
931+
mocks["info"].side_effect = FileNotFoundError
932+
933+
gcsfs.mkdir(bucket_path, placement=placement)
934+
935+
mocks["super_mkdir"].assert_called_once()
936+
call_args = mocks["super_mkdir"].call_args
937+
assert call_args[0][0] == bucket_path
938+
assert call_args[1]["customPlacementConfig"] == {
939+
"dataLocations": [placement]
940+
}
941+
assert call_args[1]["hierarchicalNamespace"] == {"enabled": True}
942+
assert call_args[1]["storageClass"] == "RAPID"
943+
944+
def test_mkdir_with_placement_creates_bucket_and_folders(
945+
self, gcs_hns, gcs_hns_mocks
946+
):
947+
"""Test mkdir with placement creates bucket and folders."""
948+
gcsfs = gcs_hns
949+
bucket_name = f"new-zonal-bucket-{uuid.uuid4()}"
950+
dir_path = f"{bucket_name}/some/dir"
951+
placement = "us-central1-a"
952+
location = "us-central1"
953+
954+
with gcs_hns_mocks(BucketType.UNKNOWN, gcsfs) as mocks:
955+
# Simulate bucket not existing initially.
956+
mocks["info"].side_effect = FileNotFoundError
957+
958+
gcsfs.mkdir(
959+
dir_path, create_parents=True, placement=placement, location=location
960+
)
961+
962+
# Verify bucket creation via super()._mkdir
963+
mocks["super_mkdir"].assert_called_once()
964+
call_args = mocks["super_mkdir"].call_args
965+
assert call_args[0][0] == bucket_name
966+
assert call_args[1]["create_parents"] is True
967+
assert call_args[1]["customPlacementConfig"] == {
968+
"dataLocations": [placement]
969+
}
970+
assert call_args[1]["hierarchicalNamespace"] == {"enabled": True}
971+
assert call_args[1]["location"] == location
972+
assert call_args[1]["storageClass"] == "RAPID"
973+
974+
# Verify folder creation via control_client.create_folder
975+
expected_request = self._get_create_folder_request(dir_path, recursive=True)
976+
mocks["control_client"].create_folder.assert_called_once_with(
977+
request=expected_request
978+
)
979+
924980

925981
class TestExtendedGcsFileSystemFind:
926982
"""Tests for the find method in ExtendedGcsFileSystem."""

0 commit comments

Comments
 (0)