Skip to content

Commit 807c554

Browse files
fix(rest): skip Hadoop-only vended storage credentials during resolution
1 parent 3bd5f27 commit 807c554

2 files changed

Lines changed: 80 additions & 4 deletions

File tree

pyiceberg/catalog/rest/__init__.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,11 @@ class ListViewsResponse(IcebergBaseModel):
392392
_PLANNING_RESPONSE_ADAPTER = TypeAdapter(PlanningResponse)
393393

394394

395+
def _is_hadoop_only_config(config: Properties) -> bool:
396+
"""Return True if every key is a Hadoop ``fs.*`` key — pyiceberg has no HadoopFileIO to consume them."""
397+
return bool(config) and all(k.startswith("fs.") for k in config)
398+
399+
395400
class RestCatalog(Catalog):
396401
uri: str
397402
_session: Session
@@ -458,22 +463,32 @@ def _create_session(self) -> Session:
458463

459464
@staticmethod
460465
def _resolve_storage_credentials(storage_credentials: list[StorageCredential], location: str | None) -> Properties:
461-
"""Resolve the best-matching storage credential by longest prefix match.
466+
"""Pick the longest-prefix storage credential for ``location``.
462467
463-
Mirrors the Java implementation in S3FileIO.clientForStoragePath() which iterates
464-
over storage credential prefixes and selects the one with the longest match.
468+
Mirrors Java ``S3FileIO.clientForStoragePath``. Hadoop-only (``fs.*``)
469+
credentials are filtered out since pyiceberg has no HadoopFileIO to
470+
consume them — otherwise a catalog vending both ``fs.*`` and ``s3.*``
471+
bundles per location could strand the FileIO with unusable keys.
465472
466473
See: https://github.com/apache/iceberg/blob/main/aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIO.java
467474
"""
468475
if not storage_credentials or not location:
469476
return {}
470477

478+
consumable = [c for c in storage_credentials if not _is_hadoop_only_config(c.config)]
479+
471480
best_match: StorageCredential | None = None
472-
for cred in storage_credentials:
481+
for cred in consumable:
473482
if location.startswith(cred.prefix):
474483
if best_match is None or len(cred.prefix) > len(best_match.prefix):
475484
best_match = cred
476485

486+
# Java S3FileIO falls back to the "s3" ROOT_PREFIX credential; scope it to
487+
# schemes pyarrow's S3FileSystem handles so non-S3 schemes (gs://, abfs://,
488+
# etc.) don't get handed s3.* keys.
489+
if best_match is None and location.startswith(("s3://", "s3a://", "s3n://", "oss://")):
490+
best_match = next((c for c in consumable if c.prefix == "s3"), None)
491+
477492
return best_match.config if best_match else {}
478493

479494
def _load_file_io(self, properties: Properties = EMPTY_DICT, location: str | None = None) -> FileIO:

tests/catalog/test_rest.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2845,6 +2845,67 @@ def test_resolve_storage_credentials_empty() -> None:
28452845
assert RestCatalog._resolve_storage_credentials([], None) == {}
28462846

28472847

2848+
def test_resolve_storage_credentials_skips_hadoop_only() -> None:
2849+
from pyiceberg.catalog.rest.scan_planning import StorageCredential
2850+
2851+
# The longer fs.* prefix would win a blind longest-match; the filter drops it.
2852+
credentials = [
2853+
StorageCredential(prefix="s3://warehouse/jindo", config={"fs.s3.access-key": "hadoop-k"}),
2854+
StorageCredential(prefix="s3://warehouse", config={"s3.access-key-id": "native-k"}),
2855+
]
2856+
result = RestCatalog._resolve_storage_credentials(credentials, "s3://warehouse/jindo/table/data")
2857+
assert result == {"s3.access-key-id": "native-k"}
2858+
2859+
2860+
def test_resolve_storage_credentials_mixed_prefix_namespaces_preserved() -> None:
2861+
from pyiceberg.catalog.rest.scan_planning import StorageCredential
2862+
2863+
credentials = [
2864+
StorageCredential(prefix="gs", config={"gs.oauth2.token": "tok"}),
2865+
StorageCredential(prefix="s3", config={"s3.access-key-id": "native-k"}),
2866+
]
2867+
result = RestCatalog._resolve_storage_credentials(credentials, "gs://bucket/path")
2868+
assert result == {"gs.oauth2.token": "tok"}
2869+
2870+
2871+
def test_resolve_storage_credentials_all_hadoop_only_returns_empty() -> None:
2872+
from pyiceberg.catalog.rest.scan_planning import StorageCredential
2873+
2874+
credentials = [
2875+
StorageCredential(prefix="custom", config={"fs.custom.access-key": "hadoop-k"}),
2876+
]
2877+
assert RestCatalog._resolve_storage_credentials(credentials, "custom://bucket/path") == {}
2878+
2879+
2880+
def test_resolve_storage_credentials_root_prefix_fallback_for_s3_compatible_scheme() -> None:
2881+
from pyiceberg.catalog.rest.scan_planning import StorageCredential
2882+
2883+
# oss:// is routed through pyarrow's S3FileSystem, so ROOT_PREFIX "s3" applies.
2884+
credentials = [
2885+
StorageCredential(prefix="s3", config={"s3.access-key-id": "native-k"}),
2886+
]
2887+
result = RestCatalog._resolve_storage_credentials(credentials, "oss://bucket/path")
2888+
assert result == {"s3.access-key-id": "native-k"}
2889+
2890+
2891+
def test_resolve_storage_credentials_root_prefix_fallback_respects_consumable() -> None:
2892+
from pyiceberg.catalog.rest.scan_planning import StorageCredential
2893+
2894+
credentials = [
2895+
StorageCredential(prefix="s3", config={"fs.s3.access-key": "hadoop-k"}),
2896+
]
2897+
assert RestCatalog._resolve_storage_credentials(credentials, "s3://bucket/path") == {}
2898+
2899+
2900+
def test_resolve_storage_credentials_fallback_skipped_for_non_s3_scheme() -> None:
2901+
from pyiceberg.catalog.rest.scan_planning import StorageCredential
2902+
2903+
credentials = [
2904+
StorageCredential(prefix="s3", config={"s3.access-key-id": "native-k"}),
2905+
]
2906+
assert RestCatalog._resolve_storage_credentials(credentials, "gs://bucket/path") == {}
2907+
2908+
28482909
def test_load_table_with_storage_credentials(rest_mock: Mocker, example_table_metadata_with_snapshot_v1: dict[str, Any]) -> None:
28492910
metadata_location = "s3://warehouse/database/table/metadata/00001.metadata.json"
28502911
rest_mock.get(

0 commit comments

Comments
 (0)