Skip to content

Commit b593375

Browse files
authored
Deprecate ADLFS prefix in favor of ADLS (#961)
* Deprecate ADLFS prefix in favor of ADLS * Add missing renaming
1 parent ae2825b commit b593375

File tree

8 files changed

+151
-98
lines changed

8 files changed

+151
-98
lines changed

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,9 @@ test-integration-rebuild:
5959
docker compose -f dev/docker-compose-integration.yml rm -f
6060
docker compose -f dev/docker-compose-integration.yml build --no-cache
6161

62-
test-adlfs: ## Run tests marked with adlfs, can add arguments with PYTEST_ARGS="-vv"
62+
test-adls: ## Run tests marked with adls, can add arguments with PYTEST_ARGS="-vv"
6363
sh ./dev/run-azurite.sh
64-
poetry run pytest tests/ -m adlfs ${PYTEST_ARGS}
64+
poetry run pytest tests/ -m adls ${PYTEST_ARGS}
6565

6666
test-gcs: ## Run tests marked with gcs, can add arguments with PYTEST_ARGS="-vv"
6767
sh ./dev/run-gcs-server.sh

mkdocs/docs/configuration.md

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -109,15 +109,15 @@ For the FileIO there are several configuration options available:
109109

110110
<!-- markdown-link-check-disable -->
111111

112-
| Key | Example | Description |
113-
| ----------------------- | ----------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
114-
| adlfs.connection-string | AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqF...;BlobEndpoint=<http://localhost/> | A [connection string](https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string). This could be used to use FileIO with any adlfs-compatible object storage service that has a different endpoint (like [azurite](https://github.com/azure/azurite)). |
115-
| adlfs.account-name | devstoreaccount1 | The account that you want to connect to |
116-
| adlfs.account-key | Eby8vdM02xNOcqF... | The key to authentication against the account. |
117-
| adlfs.sas-token | NuHOuuzdQN7VRM%2FOpOeqBlawRCA845IY05h9eu1Yte4%3D | The shared access signature |
118-
| adlfs.tenant-id | ad667be4-b811-11ed-afa1-0242ac120002 | The tenant-id |
119-
| adlfs.client-id | ad667be4-b811-11ed-afa1-0242ac120002 | The client-id |
120-
| adlfs.client-secret | oCA3R6P\*ka#oa1Sms2J74z... | The client-secret |
112+
| Key | Example | Description |
113+
| ---------------------- | ----------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
114+
| adls.connection-string | AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqF...;BlobEndpoint=<http://localhost/> | A [connection string](https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string). This could be used to use FileIO with any adls-compatible object storage service that has a different endpoint (like [azurite](https://github.com/azure/azurite)). |
115+
| adls.account-name | devstoreaccount1 | The account that you want to connect to |
116+
| adls.account-key | Eby8vdM02xNOcqF... | The key to authentication against the account. |
117+
| adls.sas-token | NuHOuuzdQN7VRM%2FOpOeqBlawRCA845IY05h9eu1Yte4%3D | The shared access signature |
118+
| adls.tenant-id | ad667be4-b811-11ed-afa1-0242ac120002 | The tenant-id |
119+
| adls.client-id | ad667be4-b811-11ed-afa1-0242ac120002 | The client-id |
120+
| adls.client-secret | oCA3R6P\*ka#oa1Sms2J74z... | The client-secret |
121121

122122
<!-- markdown-link-check-enable-->
123123

mkdocs/docs/contributing.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -106,17 +106,17 @@ For Python, `pytest` is used a testing framework in combination with `coverage`
106106
make test
107107
```
108108

109-
By default, S3 and ADLFS tests are ignored because that require minio and azurite to be running.
109+
By default, S3 and ADLS tests are ignored because that require minio and azurite to be running.
110110
To run the S3 suite:
111111

112112
```bash
113113
make test-s3
114114
```
115115

116-
To run the ADLFS suite:
116+
To run the ADLS suite:
117117

118118
```bash
119-
make test-adlfs
119+
make test-adls
120120
```
121121

122122
To pass additional arguments to pytest, you can use `PYTEST_ARGS`.

pyiceberg/io/__init__.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,14 @@
4848

4949
logger = logging.getLogger(__name__)
5050

51+
ADLFS_CONNECTION_STRING = "adlfs.connection-string"
52+
ADLFS_ACCOUNT_NAME = "adlfs.account-name"
53+
ADLFS_ACCOUNT_KEY = "adlfs.account-key"
54+
ADLFS_SAS_TOKEN = "adlfs.sas-token"
55+
ADLFS_TENANT_ID = "adlfs.tenant-id"
56+
ADLFS_CLIENT_ID = "adlfs.client-id"
57+
ADLFS_ClIENT_SECRET = "adlfs.client-secret"
58+
ADLFS_PREFIX = "adlfs"
5159
AWS_REGION = "client.region"
5260
AWS_ACCESS_KEY_ID = "client.access-key-id"
5361
AWS_SECRET_ACCESS_KEY = "client.secret-access-key"
@@ -66,13 +74,13 @@
6674
HDFS_PORT = "hdfs.port"
6775
HDFS_USER = "hdfs.user"
6876
HDFS_KERB_TICKET = "hdfs.kerberos_ticket"
69-
ADLFS_CONNECTION_STRING = "adlfs.connection-string"
70-
ADLFS_ACCOUNT_NAME = "adlfs.account-name"
71-
ADLFS_ACCOUNT_KEY = "adlfs.account-key"
72-
ADLFS_SAS_TOKEN = "adlfs.sas-token"
73-
ADLFS_TENANT_ID = "adlfs.tenant-id"
74-
ADLFS_CLIENT_ID = "adlfs.client-id"
75-
ADLFS_ClIENT_SECRET = "adlfs.client-secret"
77+
ADLS_CONNECTION_STRING = "adls.connection-string"
78+
ADLS_ACCOUNT_NAME = "adls.account-name"
79+
ADLS_ACCOUNT_KEY = "adls.account-key"
80+
ADLS_SAS_TOKEN = "adls.sas-token"
81+
ADLS_TENANT_ID = "adls.tenant-id"
82+
ADLS_CLIENT_ID = "adls.client-id"
83+
ADLS_ClIENT_SECRET = "adls.client-secret"
7684
GCS_TOKEN = "gcs.oauth2.token"
7785
GCS_TOKEN_EXPIRES_AT_MS = "gcs.oauth2.token-expires-at"
7886
GCS_PROJECT_ID = "gcs.project-id"

pyiceberg/io/fsspec.py

Lines changed: 55 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,15 @@
4444
ADLFS_ACCOUNT_NAME,
4545
ADLFS_CLIENT_ID,
4646
ADLFS_CONNECTION_STRING,
47+
ADLFS_PREFIX,
4748
ADLFS_SAS_TOKEN,
4849
ADLFS_TENANT_ID,
50+
ADLS_ACCOUNT_KEY,
51+
ADLS_ACCOUNT_NAME,
52+
ADLS_CLIENT_ID,
53+
ADLS_CONNECTION_STRING,
54+
ADLS_SAS_TOKEN,
55+
ADLS_TENANT_ID,
4956
AWS_ACCESS_KEY_ID,
5057
AWS_REGION,
5158
AWS_SECRET_ACCESS_KEY,
@@ -71,13 +78,15 @@
7178
S3_SIGNER_ENDPOINT_DEFAULT,
7279
S3_SIGNER_URI,
7380
ADLFS_ClIENT_SECRET,
81+
ADLS_ClIENT_SECRET,
7482
FileIO,
7583
InputFile,
7684
InputStream,
7785
OutputFile,
7886
OutputStream,
7987
)
8088
from pyiceberg.typedef import Properties
89+
from pyiceberg.utils.deprecated import deprecation_message
8190
from pyiceberg.utils.properties import get_first_property_value, property_as_bool
8291

8392
logger = logging.getLogger(__name__)
@@ -176,17 +185,53 @@ def _gs(properties: Properties) -> AbstractFileSystem:
176185
)
177186

178187

179-
def _adlfs(properties: Properties) -> AbstractFileSystem:
188+
def _adls(properties: Properties) -> AbstractFileSystem:
180189
from adlfs import AzureBlobFileSystem
181190

191+
for property_name in properties:
192+
if property_name.startswith(ADLFS_PREFIX):
193+
deprecation_message(
194+
deprecated_in="0.8.0",
195+
removed_in="0.9.0",
196+
help_message=f"The property {property_name} is deprecated. Please use properties that start with adls.",
197+
)
198+
182199
return AzureBlobFileSystem(
183-
connection_string=properties.get(ADLFS_CONNECTION_STRING),
184-
account_name=properties.get(ADLFS_ACCOUNT_NAME),
185-
account_key=properties.get(ADLFS_ACCOUNT_KEY),
186-
sas_token=properties.get(ADLFS_SAS_TOKEN),
187-
tenant_id=properties.get(ADLFS_TENANT_ID),
188-
client_id=properties.get(ADLFS_CLIENT_ID),
189-
client_secret=properties.get(ADLFS_ClIENT_SECRET),
200+
connection_string=get_first_property_value(
201+
properties,
202+
ADLS_CONNECTION_STRING,
203+
ADLFS_CONNECTION_STRING,
204+
),
205+
account_name=get_first_property_value(
206+
properties,
207+
ADLS_ACCOUNT_NAME,
208+
ADLFS_ACCOUNT_NAME,
209+
),
210+
account_key=get_first_property_value(
211+
properties,
212+
ADLS_ACCOUNT_KEY,
213+
ADLFS_ACCOUNT_KEY,
214+
),
215+
sas_token=get_first_property_value(
216+
properties,
217+
ADLS_SAS_TOKEN,
218+
ADLFS_SAS_TOKEN,
219+
),
220+
tenant_id=get_first_property_value(
221+
properties,
222+
ADLS_TENANT_ID,
223+
ADLFS_TENANT_ID,
224+
),
225+
client_id=get_first_property_value(
226+
properties,
227+
ADLS_CLIENT_ID,
228+
ADLFS_CLIENT_ID,
229+
),
230+
client_secret=get_first_property_value(
231+
properties,
232+
ADLS_ClIENT_SECRET,
233+
ADLFS_ClIENT_SECRET,
234+
),
190235
)
191236

192237

@@ -196,8 +241,8 @@ def _adlfs(properties: Properties) -> AbstractFileSystem:
196241
"s3": _s3,
197242
"s3a": _s3,
198243
"s3n": _s3,
199-
"abfs": _adlfs,
200-
"abfss": _adlfs,
244+
"abfs": _adls,
245+
"abfss": _adls,
201246
"gs": _gs,
202247
"gcs": _gs,
203248
}

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -603,7 +603,7 @@ gcsfs = ["gcsfs"]
603603
markers = [
604604
"unmarked: marks a test as a unittest",
605605
"s3: marks a test as requiring access to s3 compliant storage (use with --aws-access-key-id, --aws-secret-access-key, and --endpoint args)",
606-
"adlfs: marks a test as requiring access to adlfs compliant storage (use with --adlfs.account-name, --adlfs.account-key, and --adlfs.endpoint args)",
606+
"adls: marks a test as requiring access to adls compliant storage (use with --adls.account-name, --adls.account-key, and --adls.endpoint args)",
607607
"integration: marks integration tests against Apache Spark",
608608
"gcs: marks a test as requiring access to gcs compliant storage (use with --gs.token, --gs.project, and --gs.endpoint)",
609609
]

tests/conftest.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -111,23 +111,23 @@ def pytest_addoption(parser: pytest.Parser) -> None:
111111
parser.addoption(
112112
"--s3.secret-access-key", action="store", default="password", help="The AWS secret access key ID for tests marked as s3"
113113
)
114-
# ADLFS options
114+
# ADLS options
115115
# Azurite provides default account name and key. Those can be customized using env variables.
116116
# For more information, see README file at https://github.com/azure/azurite#default-storage-account
117117
parser.addoption(
118-
"--adlfs.endpoint",
118+
"--adls.endpoint",
119119
action="store",
120120
default="http://127.0.0.1:10000",
121-
help="The ADLS endpoint URL for tests marked as adlfs",
121+
help="The ADLS endpoint URL for tests marked as adls",
122122
)
123123
parser.addoption(
124-
"--adlfs.account-name", action="store", default="devstoreaccount1", help="The ADLS account key for tests marked as adlfs"
124+
"--adls.account-name", action="store", default="devstoreaccount1", help="The ADLS account key for tests marked as adls"
125125
)
126126
parser.addoption(
127-
"--adlfs.account-key",
127+
"--adls.account-key",
128128
action="store",
129129
default="Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==",
130-
help="The ADLS secret account key for tests marked as adlfs",
130+
help="The ADLS secret account key for tests marked as adls",
131131
)
132132
parser.addoption(
133133
"--gcs.endpoint", action="store", default="http://0.0.0.0:4443", help="The GCS endpoint URL for tests marked gcs"
@@ -1955,16 +1955,16 @@ def fixture_dynamodb(_aws_credentials: None) -> Generator[boto3.client, None, No
19551955

19561956

19571957
@pytest.fixture
1958-
def adlfs_fsspec_fileio(request: pytest.FixtureRequest) -> Generator[FsspecFileIO, None, None]:
1958+
def adls_fsspec_fileio(request: pytest.FixtureRequest) -> Generator[FsspecFileIO, None, None]:
19591959
from azure.storage.blob import BlobServiceClient
19601960

1961-
azurite_url = request.config.getoption("--adlfs.endpoint")
1962-
azurite_account_name = request.config.getoption("--adlfs.account-name")
1963-
azurite_account_key = request.config.getoption("--adlfs.account-key")
1961+
azurite_url = request.config.getoption("--adls.endpoint")
1962+
azurite_account_name = request.config.getoption("--adls.account-name")
1963+
azurite_account_key = request.config.getoption("--adls.account-key")
19641964
azurite_connection_string = f"DefaultEndpointsProtocol=http;AccountName={azurite_account_name};AccountKey={azurite_account_key};BlobEndpoint={azurite_url}/{azurite_account_name};"
19651965
properties = {
1966-
"adlfs.connection-string": azurite_connection_string,
1967-
"adlfs.account-name": azurite_account_name,
1966+
"adls.connection-string": azurite_connection_string,
1967+
"adls.account-name": azurite_account_name,
19681968
}
19691969

19701970
bbs = BlobServiceClient.from_connection_string(conn_str=azurite_connection_string)

0 commit comments

Comments
 (0)