From 0d1de4e29d35df04b9949cfe860e9aae3b6e2f0b Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Thu, 14 Aug 2025 13:12:58 -0700 Subject: [PATCH 1/3] support adls.token --- pyiceberg/io/__init__.py | 1 + pyiceberg/io/fsspec.py | 27 ++++++++++++++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/pyiceberg/io/__init__.py b/pyiceberg/io/__init__.py index f89de18f12..9e4bf104c2 100644 --- a/pyiceberg/io/__init__.py +++ b/pyiceberg/io/__init__.py @@ -86,6 +86,7 @@ ADLS_DFS_STORAGE_AUTHORITY = "adls.dfs-storage-authority" ADLS_BLOB_STORAGE_SCHEME = "adls.blob-storage-scheme" ADLS_DFS_STORAGE_SCHEME = "adls.dfs-storage-scheme" +ADLS_TOKEN = "adls.token" GCS_TOKEN = "gcs.oauth2.token" GCS_TOKEN_EXPIRES_AT_MS = "gcs.oauth2.token-expires-at" GCS_PROJECT_ID = "gcs.project-id" diff --git a/pyiceberg/io/fsspec.py b/pyiceberg/io/fsspec.py index d075765ed1..d37890d58b 100644 --- a/pyiceberg/io/fsspec.py +++ b/pyiceberg/io/fsspec.py @@ -48,6 +48,7 @@ ADLS_CREDENTIAL, ADLS_SAS_TOKEN, ADLS_TENANT_ID, + ADLS_TOKEN, AWS_ACCESS_KEY_ID, AWS_REGION, AWS_SECRET_ACCESS_KEY, @@ -192,7 +193,11 @@ def _gs(properties: Properties) -> AbstractFileSystem: def _adls(properties: Properties) -> AbstractFileSystem: + import time + from adlfs import AzureBlobFileSystem + from azure.core.credentials import AccessToken + from azure.core.credentials_async import AsyncTokenCredential for key, sas_token in { key.replace(f"{ADLS_SAS_TOKEN}.", ""): value for key, value in properties.items() if key.startswith(ADLS_SAS_TOKEN) @@ -202,9 +207,29 @@ def _adls(properties: Properties) -> AbstractFileSystem: if ADLS_SAS_TOKEN not in properties: properties[ADLS_SAS_TOKEN] = sas_token + class StaticTokenCredential(AsyncTokenCredential): + def __init__(self, token_string: str) -> None: + self._token = token_string + # If no expiry provided, set 1 hour from now + self._expires_on = int(time.time()) + 3600 + + async def get_token(self, *scopes: str, **kwargs: Any) -> AccessToken: + return AccessToken(self._token, self._expires_on) + + if ADLS_TOKEN in properties: + token = properties.get(ADLS_TOKEN) + if token is not None: + credential = StaticTokenCredential(token) + else: + credential = None + elif ADLS_CREDENTIAL in properties: + credential = properties.get(ADLS_CREDENTIAL) + else: + credential = None + return AzureBlobFileSystem( connection_string=properties.get(ADLS_CONNECTION_STRING), - credential=properties.get(ADLS_CREDENTIAL), + credential=credential, account_name=properties.get(ADLS_ACCOUNT_NAME), account_key=properties.get(ADLS_ACCOUNT_KEY), sas_token=properties.get(ADLS_SAS_TOKEN), From 9d772d5df185064ed824e37e6a38623bf84de57d Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Thu, 14 Aug 2025 13:21:04 -0700 Subject: [PATCH 2/3] thanks gemini --- pyiceberg/io/fsspec.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/pyiceberg/io/fsspec.py b/pyiceberg/io/fsspec.py index d37890d58b..9f10caac49 100644 --- a/pyiceberg/io/fsspec.py +++ b/pyiceberg/io/fsspec.py @@ -208,24 +208,20 @@ def _adls(properties: Properties) -> AbstractFileSystem: properties[ADLS_SAS_TOKEN] = sas_token class StaticTokenCredential(AsyncTokenCredential): + _DEFAULT_EXPIRY_SECONDS = 3600 + def __init__(self, token_string: str) -> None: self._token = token_string - # If no expiry provided, set 1 hour from now - self._expires_on = int(time.time()) + 3600 + # Set expiration 1 hour from now + self._expires_on = int(time.time()) + self._DEFAULT_EXPIRY_SECONDS async def get_token(self, *scopes: str, **kwargs: Any) -> AccessToken: return AccessToken(self._token, self._expires_on) - if ADLS_TOKEN in properties: - token = properties.get(ADLS_TOKEN) - if token is not None: - credential = StaticTokenCredential(token) - else: - credential = None - elif ADLS_CREDENTIAL in properties: - credential = properties.get(ADLS_CREDENTIAL) + if token := properties.get(ADLS_TOKEN): + credential = StaticTokenCredential(token) else: - credential = None + credential = properties.get(ADLS_CREDENTIAL) # type: ignore return AzureBlobFileSystem( connection_string=properties.get(ADLS_CONNECTION_STRING), From 8bd353077a75da5e90355152e09e78bf7c8a3917 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Thu, 14 Aug 2025 13:23:33 -0700 Subject: [PATCH 3/3] thx gemini --- pyiceberg/io/fsspec.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyiceberg/io/fsspec.py b/pyiceberg/io/fsspec.py index 9f10caac49..fafeee307e 100644 --- a/pyiceberg/io/fsspec.py +++ b/pyiceberg/io/fsspec.py @@ -212,11 +212,11 @@ class StaticTokenCredential(AsyncTokenCredential): def __init__(self, token_string: str) -> None: self._token = token_string - # Set expiration 1 hour from now - self._expires_on = int(time.time()) + self._DEFAULT_EXPIRY_SECONDS async def get_token(self, *scopes: str, **kwargs: Any) -> AccessToken: - return AccessToken(self._token, self._expires_on) + # Set expiration 1 hour from now + expires_on = int(time.time()) + self._DEFAULT_EXPIRY_SECONDS + return AccessToken(self._token, expires_on) if token := properties.get(ADLS_TOKEN): credential = StaticTokenCredential(token)