Skip to content

Commit 8052652

Browse files
authored
Add missing Arrow AzureFileSystem parameters (#2301)
<!-- Thanks for opening a pull request! --> <!-- In the case this PR will resolve an issue, please replace ${GITHUB_ISSUE_ID} below with the actual Github issue id. --> <!-- Closes #${GITHUB_ISSUE_ID} --> # Rationale for this change Similar to #2299 This PR adds the rest of the parameters to [`pyarrow.fs.AzureFileSystem`](https://arrow.apache.org/docs/python/generated/pyarrow.fs.AzureFileSystem.html) Note the [Azure Data Lake configuration page](https://github.com/apache/iceberg-python/blob/main/mkdocs/docs/configuration.md#azure-data-lake) already has these 3 parameters # Are these changes tested? # Are there any user-facing changes? <!-- In the case of user-facing changes, please add the changelog label. -->
1 parent 0d463b9 commit 8052652

File tree

1 file changed

+22
-0
lines changed

1 file changed

+22
-0
lines changed

pyiceberg/io/pyarrow.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,9 +87,12 @@
8787
ADLS_ACCOUNT_NAME,
8888
ADLS_BLOB_STORAGE_AUTHORITY,
8989
ADLS_BLOB_STORAGE_SCHEME,
90+
ADLS_CLIENT_ID,
91+
ADLS_CLIENT_SECRET,
9092
ADLS_DFS_STORAGE_AUTHORITY,
9193
ADLS_DFS_STORAGE_SCHEME,
9294
ADLS_SAS_TOKEN,
95+
ADLS_TENANT_ID,
9396
AWS_ACCESS_KEY_ID,
9497
AWS_REGION,
9598
AWS_ROLE_ARN,
@@ -501,6 +504,7 @@ def _initialize_s3_fs(self, netloc: Optional[str]) -> FileSystem:
501504
return S3FileSystem(**client_kwargs)
502505

503506
def _initialize_azure_fs(self) -> FileSystem:
507+
# https://arrow.apache.org/docs/python/generated/pyarrow.fs.AzureFileSystem.html
504508
from packaging import version
505509

506510
MIN_PYARROW_VERSION_SUPPORTING_AZURE_FS = "20.0.0"
@@ -535,6 +539,24 @@ def _initialize_azure_fs(self) -> FileSystem:
535539
if sas_token := self.properties.get(ADLS_SAS_TOKEN):
536540
client_kwargs["sas_token"] = sas_token
537541

542+
if client_id := self.properties.get(ADLS_CLIENT_ID):
543+
client_kwargs["client_id"] = client_id
544+
if client_secret := self.properties.get(ADLS_CLIENT_SECRET):
545+
client_kwargs["client_secret"] = client_secret
546+
if tenant_id := self.properties.get(ADLS_TENANT_ID):
547+
client_kwargs["tenant_id"] = tenant_id
548+
549+
# Validate that all three are provided together for ClientSecretCredential
550+
credential_keys = ["client_id", "client_secret", "tenant_id"]
551+
provided_keys = [key for key in credential_keys if key in client_kwargs]
552+
if provided_keys and len(provided_keys) != len(credential_keys):
553+
missing_keys = [key for key in credential_keys if key not in client_kwargs]
554+
raise ValueError(
555+
f"client_id, client_secret, and tenant_id must all be provided together "
556+
f"to use ClientSecretCredential for Azure authentication. "
557+
f"Provided: {provided_keys}, Missing: {missing_keys}"
558+
)
559+
538560
return AzureFileSystem(**client_kwargs)
539561

540562
def _initialize_hdfs_fs(self, scheme: str, netloc: Optional[str]) -> FileSystem:

0 commit comments

Comments
 (0)