Skip to content

Commit 197d0e6

Browse files
committed
simplify get remaining properties
1 parent 902e20e commit 197d0e6

File tree

2 files changed

+32
-16
lines changed

2 files changed

+32
-16
lines changed

pyiceberg/io/pyarrow.py

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@
186186
from pyiceberg.utils.deprecated import deprecation_message
187187
from pyiceberg.utils.properties import (
188188
filter_properties,
189+
properties_with_prefix,
189190
property_as_bool,
190191
property_as_int,
191192
)
@@ -508,9 +509,8 @@ def _initialize_oss_fs(self) -> FileSystem:
508509
if session_name := get(S3_ROLE_SESSION_NAME, AWS_ROLE_SESSION_NAME, "oss.session_name"):
509510
client_kwargs["session_name"] = session_name
510511

511-
remaining_oss_props = {
512-
k.removeprefix("oss."): v for k, v in self.properties.items() if k.startswith("oss.") and k not in used_keys
513-
}
512+
# get the rest of the properties with the `oss.` prefix that are not already evaluated
513+
remaining_oss_props = properties_with_prefix({k: v for k, v in self.properties.items() if k not in used_keys}, "oss.")
514514
client_kwargs = {**remaining_oss_props, **client_kwargs}
515515
return S3FileSystem(**client_kwargs)
516516

@@ -555,9 +555,8 @@ def _initialize_s3_fs(self, netloc: Optional[str]) -> FileSystem:
555555
if retry_instance := _import_retry_strategy(retry_strategy_impl):
556556
client_kwargs["retry_strategy"] = retry_instance
557557

558-
remaining_s3_props = {
559-
k.removeprefix("s3."): v for k, v in self.properties.items() if k.startswith("s3.") and k not in used_keys
560-
}
558+
# get the rest of the properties with the `s3.` prefix that are not already evaluated
559+
remaining_s3_props = properties_with_prefix({k: v for k, v in self.properties.items() if k not in used_keys}, "s3.")
561560
client_kwargs = {**remaining_s3_props, **client_kwargs}
562561
return S3FileSystem(**client_kwargs)
563562

@@ -599,9 +598,8 @@ def _initialize_azure_fs(self) -> FileSystem:
599598
if sas_token := get(ADLS_SAS_TOKEN, "adls.sas_token"):
600599
client_kwargs["sas_token"] = sas_token
601600

602-
remaining_adls_props = {
603-
k.removeprefix("adls."): v for k, v in self.properties.items() if k.startswith("adls.") and k not in used_keys
604-
}
601+
# get the rest of the properties with the `adls.` prefix that are not already evaluated
602+
remaining_adls_props = properties_with_prefix({k: v for k, v in self.properties.items() if k not in used_keys}, "adls.")
605603
client_kwargs = {**remaining_adls_props, **client_kwargs}
606604
return AzureFileSystem(**client_kwargs)
607605

@@ -626,9 +624,8 @@ def _initialize_hdfs_fs(self, scheme: str, netloc: Optional[str]) -> FileSystem:
626624
if kerb_ticket := get(HDFS_KERB_TICKET, "hdfs.kerb_ticket"):
627625
client_kwargs["kerb_ticket"] = kerb_ticket
628626

629-
remaining_hdfs_props = {
630-
k.removeprefix("hdfs."): v for k, v in self.properties.items() if k.startswith("hdfs.") and k not in used_keys
631-
}
627+
# get the rest of the properties with the `hdfs.` prefix that are not already evaluated
628+
remaining_hdfs_props = properties_with_prefix({k: v for k, v in self.properties.items() if k not in used_keys}, "hdfs.")
632629
client_kwargs = {**remaining_hdfs_props, **client_kwargs}
633630
return HadoopFileSystem(**client_kwargs)
634631

@@ -660,14 +657,13 @@ def _initialize_gcs_fs(self) -> FileSystem:
660657
if project_id := get(GCS_PROJECT_ID, "gcs.project_id"):
661658
client_kwargs["project_id"] = project_id
662659

663-
remaining_gcs_props = {
664-
k.removeprefix("gcs."): v for k, v in self.properties.items() if k.startswith("gcs.") and k not in used_keys
665-
}
660+
# get the rest of the properties with the `gcs.` prefix that are not already evaluated
661+
remaining_gcs_props = properties_with_prefix({k: v for k, v in self.properties.items() if k not in used_keys}, "gcs.")
666662
client_kwargs = {**remaining_gcs_props, **client_kwargs}
667663
return GcsFileSystem(**client_kwargs)
668664

669665
def _initialize_local_fs(self) -> FileSystem:
670-
client_kwargs = {k.removeprefix("file."): v for k, v in self.properties.items() if k.startswith("file.")}
666+
client_kwargs = properties_with_prefix(self.properties, "file.")
671667
return PyArrowLocalFileSystem(**client_kwargs)
672668

673669
def new_input(self, location: str) -> PyArrowFile:

pyiceberg/utils/properties.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,26 @@ def get_header_properties(
8686
return {key[header_prefix_len:]: value for key, value in properties.items() if key.startswith(HEADER_PREFIX)}
8787

8888

89+
def properties_with_prefix(
90+
properties: Properties,
91+
prefix: str,
92+
) -> Properties:
93+
"""
94+
Return subset of provided map with keys matching the provided prefix. Matching is case-sensitive and the matching prefix is removed from the keys in returned map.
95+
96+
Args:
97+
properties: input map
98+
prefix: prefix to choose keys from input map
99+
100+
Returns:
101+
subset of input map with keys starting with provided prefix and prefix trimmed out
102+
"""
103+
if not properties:
104+
return {}
105+
106+
return {key[len(prefix) :]: value for key, value in properties.items() if key.startswith(prefix)}
107+
108+
89109
def filter_properties(
90110
properties: Properties,
91111
key_predicate: Callable[[str], bool],

0 commit comments

Comments
 (0)