diff --git a/Makefile b/Makefile index 859d8dfa23..2ab9ce6344 100644 --- a/Makefile +++ b/Makefile @@ -103,6 +103,10 @@ test-integration-rebuild: ## Rebuild integration Docker services from scratch docker compose -f dev/docker-compose-integration.yml rm -f docker compose -f dev/docker-compose-integration.yml build --no-cache +test-integration-trino: ## Run tests marked with @pytest.mark.integration_trino + sh ./dev/run-trino.sh + $(TEST_RUNNER) pytest tests/ -m integration_trino $(PYTEST_ARGS) + test-s3: ## Run tests marked with @pytest.mark.s3 sh ./dev/run-minio.sh $(TEST_RUNNER) pytest tests/ -m s3 $(PYTEST_ARGS) @@ -116,7 +120,7 @@ test-gcs: ## Run tests marked with @pytest.mark.gcs $(TEST_RUNNER) pytest tests/ -m gcs $(PYTEST_ARGS) test-coverage: COVERAGE=1 -test-coverage: test test-integration test-s3 test-adls test-gcs coverage-report ## Run all tests with coverage and report +test-coverage: test test-integration test-integration-trino test-s3 test-adls test-gcs coverage-report ## Run all tests with coverage and report coverage-report: ## Combine and report coverage poetry run coverage combine diff --git a/dev/docker-compose-integration.yml b/dev/docker-compose-integration.yml index c901b2ee23..07e9cc9984 100644 --- a/dev/docker-compose-integration.yml +++ b/dev/docker-compose-integration.yml @@ -54,6 +54,21 @@ services: - CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO - CATALOG_S3_ENDPOINT=http://minio:9000 - CATALOG_JDBC_STRICT__MODE=true + + trino: + image: trinodb/trino:476 + container_name: pyiceberg-trino + networks: + iceberg_net: + ports: + - 8082:8080 + environment: + - CATALOG_MANAGEMENT=dynamic + depends_on: + - rest + - hive + volumes: + - ./trino/catalog:/etc/trino/catalog minio: image: minio/minio container_name: pyiceberg-minio diff --git a/dev/docker-compose-trino.yml b/dev/docker-compose-trino.yml new file mode 100644 index 0000000000..24a3ecf974 --- /dev/null +++ b/dev/docker-compose-trino.yml @@ -0,0 +1,96 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +services: + rest: + image: apache/iceberg-rest-fixture + container_name: pyiceberg-rest + networks: + iceberg_net: + ports: + - 8181:8181 + environment: + - AWS_ACCESS_KEY_ID=admin + - AWS_SECRET_ACCESS_KEY=password + - AWS_REGION=us-east-1 + - CATALOG_WAREHOUSE=s3://warehouse/ + - CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO + - CATALOG_S3_ENDPOINT=http://minio:9000 + + trino: + image: trinodb/trino:476 + container_name: pyiceberg-trino + networks: + iceberg_net: + ports: + - 8082:8080 + environment: + - CATALOG_MANAGEMENT=dynamic + depends_on: + - rest + - hive + volumes: + - ./trino/catalog:/etc/trino/catalog + - ./trino/config.properties:/etc/trino/config.properties + + minio: + image: minio/minio + container_name: pyiceberg-minio + environment: + - MINIO_ROOT_USER=admin + - MINIO_ROOT_PASSWORD=password + - MINIO_DOMAIN=minio + networks: + iceberg_net: + aliases: + - warehouse.minio + ports: + - 9001:9001 + - 9000:9000 + command: ["server", "/data", "--console-address", ":9001"] + mc: + depends_on: + - minio + image: minio/mc + container_name: pyiceberg-mc + networks: + iceberg_net: + environment: + - AWS_ACCESS_KEY_ID=admin + - AWS_SECRET_ACCESS_KEY=password + - AWS_REGION=us-east-1 + entrypoint: > + /bin/sh -c " + until (/usr/bin/mc alias set minio http://minio:9000 admin password) do echo '...waiting...' && sleep 1; done; + /usr/bin/mc mb minio/warehouse; + /usr/bin/mc policy set public minio/warehouse; + tail -f /dev/null + " + + hive: + build: hive/ + container_name: hive + hostname: hive + networks: + iceberg_net: + ports: + - 9083:9083 + environment: + SERVICE_NAME: "metastore" + SERVICE_OPTS: "-Dmetastore.warehouse.dir=s3a://warehouse/hive/" + +networks: + iceberg_net: diff --git a/dev/run-trino.sh b/dev/run-trino.sh new file mode 100644 index 0000000000..e212067293 --- /dev/null +++ b/dev/run-trino.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +set -ex + +if [ $(docker ps -q --filter "name=pyiceberg-trino" --filter "status=running" ) ]; then + echo "Trino service running" +else + docker compose -f dev/docker-compose-trino.yml kill + docker compose -f dev/docker-compose-trino.yml up -d + while [ -z $(docker ps -q --filter "name=pyiceberg-trino" --filter "status=running" ) ] + do + echo "Waiting for Trino" + sleep 1 + done +fi diff --git a/dev/trino/catalog/warehouse_hive.properties b/dev/trino/catalog/warehouse_hive.properties new file mode 100644 index 0000000000..54b69d966b --- /dev/null +++ b/dev/trino/catalog/warehouse_hive.properties @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +connector.name=iceberg +iceberg.catalog.type=hive_metastore +iceberg.expire-snapshots.min-retention=0d +iceberg.remove-orphan-files.min-retention=0d +iceberg.register-table-procedure.enabled=true +hive.metastore.uri=thrift://hive:9083 +iceberg.hive-catalog-name=hive +fs.native-s3.enabled=true +s3.region=us-east-1 +s3.aws-access-key=admin +s3.aws-secret-key=password +s3.endpoint=http://minio:9000 +s3.path-style-access=false diff --git a/dev/trino/catalog/warehouse_rest.properties b/dev/trino/catalog/warehouse_rest.properties new file mode 100644 index 0000000000..c80d9cff04 --- /dev/null +++ b/dev/trino/catalog/warehouse_rest.properties @@ -0,0 +1,31 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +connector.name=iceberg +iceberg.catalog.type=rest +iceberg.rest-catalog.uri=http://rest:8181 +iceberg.rest-catalog.warehouse=s3://warehouse/default +iceberg.rest-catalog.nested-namespace-enabled=true +iceberg.rest-catalog.case-insensitive-name-matching=true +iceberg.expire-snapshots.min-retention=0d +iceberg.remove-orphan-files.min-retention=0d +iceberg.register-table-procedure.enabled=true +fs.native-s3.enabled=true +s3.region=us-east-1 +s3.aws-access-key=admin +s3.aws-secret-key=password +s3.endpoint=http://minio:9000 +s3.path-style-access=false diff --git a/dev/trino/config.properties b/dev/trino/config.properties new file mode 100644 index 0000000000..733e937032 --- /dev/null +++ b/dev/trino/config.properties @@ -0,0 +1,8 @@ +#single node install config +coordinator=true +node-scheduler.include-coordinator=true +http-server.http.port=8080 +discovery.uri=http://localhost:8080 +http-server.process-forwarded=true +http-server.https.enabled=false +catalog.management=${ENV:CATALOG_MANAGEMENT} \ No newline at end of file diff --git a/poetry.lock b/poetry.lock index 9360f9e79c..3368da71f9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -59,7 +59,7 @@ description = "Happy Eyeballs for asyncio" optional = true python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\"" +markers = "extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\"" files = [ {file = "aiohappyeyeballs-2.6.1-py3-none-any.whl", hash = "sha256:f349ba8f4b75cb25c99c5c2d84e997e485204d2902a9597802b0371f09331fb8"}, {file = "aiohappyeyeballs-2.6.1.tar.gz", hash = "sha256:c3f9d0113123803ccadfdf3f0faa505bc78e6a72d1cc4806cbd719826e943558"}, @@ -72,7 +72,7 @@ description = "Async http client/server framework (asyncio)" optional = true python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\"" +markers = "extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\"" files = [ {file = "aiohttp-3.12.14-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:906d5075b5ba0dd1c66fcaaf60eb09926a9fef3ca92d912d2a0bbdbecf8b1248"}, {file = "aiohttp-3.12.14-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c875bf6fc2fd1a572aba0e02ef4e7a63694778c5646cdbda346ee24e630d30fb"}, @@ -202,7 +202,7 @@ description = "aiosignal: a list of registered asynchronous callbacks" optional = true python-versions = ">=3.9" groups = ["main"] -markers = "python_version == \"3.9\" and extra == \"ray\" or (extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\") and python_version < \"3.10\" or python_version >= \"3.10\" and (extra == \"ray\" or extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\")" +markers = "python_version == \"3.9\" and extra == \"ray\" or (extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\") and python_version < \"3.10\" or python_version >= \"3.10\" and (extra == \"ray\" or extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\")" files = [ {file = "aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e"}, {file = "aiosignal-1.4.0.tar.gz", hash = "sha256:f47eecd9468083c2029cc99945502cb7708b082c232f9aca65da147157b251c7"}, @@ -269,7 +269,7 @@ description = "Timeout context manager for asyncio programs" optional = true python-versions = ">=3.8" groups = ["main"] -markers = "(extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\") and python_version <= \"3.10\"" +markers = "(extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\") and python_version <= \"3.10\"" files = [ {file = "async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"}, {file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"}, @@ -286,7 +286,7 @@ files = [ {file = "attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3"}, {file = "attrs-25.3.0.tar.gz", hash = "sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b"}, ] -markers = {main = "python_version == \"3.9\" and extra == \"ray\" or (extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\") and python_version < \"3.10\" or python_version >= \"3.10\" and (extra == \"ray\" or extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\")"} +markers = {main = "python_version == \"3.9\" and extra == \"ray\" or (extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\") and python_version < \"3.10\" or python_version >= \"3.10\" and (extra == \"ray\" or extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\")"} [package.extras] benchmark = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] @@ -375,15 +375,15 @@ requests = ">=2.20.0" [[package]] name = "azure-identity" -version = "1.23.0" +version = "1.23.1" description = "Microsoft Azure Identity Library for Python" optional = true python-versions = ">=3.9" groups = ["main"] markers = "extra == \"adlfs\"" files = [ - {file = "azure_identity-1.23.0-py3-none-any.whl", hash = "sha256:dbbeb64b8e5eaa81c44c565f264b519ff2de7ff0e02271c49f3cb492762a50b0"}, - {file = "azure_identity-1.23.0.tar.gz", hash = "sha256:d9cdcad39adb49d4bb2953a217f62aec1f65bbb3c63c9076da2be2a47e53dde4"}, + {file = "azure_identity-1.23.1-py3-none-any.whl", hash = "sha256:7eed28baa0097a47e3fb53bd35a63b769e6b085bb3cb616dfce2b67f28a004a1"}, + {file = "azure_identity-1.23.1.tar.gz", hash = "sha256:226c1ef982a9f8d5dcf6e0f9ed35eaef2a4d971e7dd86317e9b9d52e70a035e4"}, ] [package.dependencies] @@ -395,15 +395,15 @@ typing-extensions = ">=4.0.0" [[package]] name = "azure-storage-blob" -version = "12.25.1" +version = "12.26.0" description = "Microsoft Azure Blob Storage Client Library for Python" optional = true python-versions = ">=3.8" groups = ["main"] markers = "extra == \"adlfs\"" files = [ - {file = "azure_storage_blob-12.25.1-py3-none-any.whl", hash = "sha256:1f337aab12e918ec3f1b638baada97550673911c4ceed892acc8e4e891b74167"}, - {file = "azure_storage_blob-12.25.1.tar.gz", hash = "sha256:4f294ddc9bc47909ac66b8934bd26b50d2000278b10ad82cc109764fdc6e0e3b"}, + {file = "azure_storage_blob-12.26.0-py3-none-any.whl", hash = "sha256:8c5631b8b22b4f53ec5fff2f3bededf34cfef111e2af613ad42c9e6de00a77fe"}, + {file = "azure_storage_blob-12.26.0.tar.gz", hash = "sha256:5dd7d7824224f7de00bfeb032753601c982655173061e242f13be6e26d78d71f"}, ] [package.dependencies] @@ -545,7 +545,7 @@ files = [ {file = "boto3-1.38.46-py3-none-any.whl", hash = "sha256:9c8e88a32a6465e5905308708cff5b17547117f06982908bdfdb0108b4a65079"}, {file = "boto3-1.38.46.tar.gz", hash = "sha256:d1ca2b53138afd0341e1962bd52be6071ab7a63c5b4f89228c5ef8942c40c852"}, ] -markers = {main = "extra == \"dynamodb\" or extra == \"glue\" or extra == \"rest-sigv4\""} +markers = {main = "extra == \"glue\" or extra == \"dynamodb\" or extra == \"rest-sigv4\""} [package.dependencies] botocore = ">=1.38.46,<1.39.0" @@ -566,7 +566,7 @@ files = [ {file = "botocore-1.38.46-py3-none-any.whl", hash = "sha256:89ca782ffbf2e8769ca9c89234cfa5ca577f1987d07d913ee3c68c4776b1eb5b"}, {file = "botocore-1.38.46.tar.gz", hash = "sha256:8798e5a418c27cf93195b077153644aea44cb171fcd56edc1ecebaa1e49e226e"}, ] -markers = {main = "extra == \"dynamodb\" or extra == \"glue\" or extra == \"rest-sigv4\" or extra == \"s3fs\""} +markers = {main = "extra == \"glue\" or extra == \"dynamodb\" or extra == \"rest-sigv4\" or extra == \"s3fs\""} [package.dependencies] jmespath = ">=0.7.1,<2.0.0" @@ -706,7 +706,7 @@ files = [ {file = "cffi-1.17.1-cp39-cp39-win_amd64.whl", hash = "sha256:d016c76bdd850f3c626af19b0542c9677ba156e4ee4fccfdd7848803533ef662"}, {file = "cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824"}, ] -markers = {main = "(extra == \"zstandard\" or extra == \"adlfs\") and platform_python_implementation == \"PyPy\" or extra == \"adlfs\"", dev = "platform_python_implementation != \"PyPy\""} +markers = {main = "(extra == \"zstandard\" or extra == \"adlfs\") and platform_python_implementation == \"PyPy\" or extra == \"adlfs\""} [package.dependencies] pycparser = "*" @@ -859,6 +859,7 @@ description = "Composable command line interface toolkit" optional = false python-versions = ">=3.7" groups = ["main", "dev", "docs"] +markers = "python_version < \"3.10\"" files = [ {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"}, {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"}, @@ -867,6 +868,22 @@ files = [ [package.dependencies] colorama = {version = "*", markers = "platform_system == \"Windows\""} +[[package]] +name = "click" +version = "8.2.1" +description = "Composable command line interface toolkit" +optional = false +python-versions = ">=3.10" +groups = ["main", "dev", "docs"] +markers = "python_version >= \"3.10\"" +files = [ + {file = "click-8.2.1-py3-none-any.whl", hash = "sha256:61a3265b914e850b85317d0b3109c7f8cd35a670f963866005d6ef1d5175a12b"}, + {file = "click-8.2.1.tar.gz", hash = "sha256:27c491cc05d968d271d5a1db13e3b5a184636d9d930f148c50b038f0d0646202"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + [[package]] name = "cloudpickle" version = "3.1.1" @@ -1228,19 +1245,19 @@ files = [ [[package]] name = "daft" -version = "0.5.11" +version = "0.5.12" description = "Distributed Dataframes for Multimodal Data" optional = true python-versions = ">=3.9" groups = ["main"] markers = "extra == \"daft\"" files = [ - {file = "daft-0.5.11-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:661978a311c9a75c071c1be5d6576e47b1d5a698d4472851d10f82f451970780"}, - {file = "daft-0.5.11-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:aa21103bdeffc7a4810d79ed21822fa279409e53f6eb4d9dd711fb44aa162c30"}, - {file = "daft-0.5.11-cp39-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:aa01effca4e6d23df2e767bee126c853a6e6e2dc5c27357d081b1973423e4f87"}, - {file = "daft-0.5.11-cp39-abi3-manylinux_2_24_x86_64.whl", hash = "sha256:640cbd53bfba15184d942206d1a6a0b3baf00c9c98c689dbe432e3728b125b94"}, - {file = "daft-0.5.11-cp39-abi3-win_amd64.whl", hash = "sha256:771ebd208247600391459e5b45b8895d64071550aef407f5185cf9a81ea70d67"}, - {file = "daft-0.5.11.tar.gz", hash = "sha256:b2495828b27e1658505b1c088eeda7eaa02aae51715528c738fe4b2e367eed28"}, + {file = "daft-0.5.12-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:dd1c745612ca8e346a9d150ea65866e0b5ff896e5d74baa7e1f9a6dd3f55bab0"}, + {file = "daft-0.5.12-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:ca0a1ffb9fdd4ecabab5bd433856a6bca85d92df2a3aae7fe3b6c3d6327fd24b"}, + {file = "daft-0.5.12-cp39-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:977a16b0ced05980ecda27d73572f874de466d5eb9e32509ad84b9e4b508dbe5"}, + {file = "daft-0.5.12-cp39-abi3-manylinux_2_24_x86_64.whl", hash = "sha256:b998a4f7ef2be7956aabbccc72bd66bfb343528a8aa856a82045bd3692aa9ec5"}, + {file = "daft-0.5.12-cp39-abi3-win_amd64.whl", hash = "sha256:a656897d6ad76134148b15185b4b94fbad769eb977a60d83fa27bcacbd8549e5"}, + {file = "daft-0.5.12.tar.gz", hash = "sha256:5eba32a6b25a21cb53357394e22e3d2cf959a78897a88716bfc5ed375abd113e"}, ] [package.dependencies] @@ -1332,14 +1349,14 @@ tomli = {version = ">=2.0.1", markers = "python_version < \"3.11\""} [[package]] name = "distlib" -version = "0.3.9" +version = "0.4.0" description = "Distribution utilities" optional = false python-versions = "*" groups = ["dev"] files = [ - {file = "distlib-0.3.9-py2.py3-none-any.whl", hash = "sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87"}, - {file = "distlib-0.3.9.tar.gz", hash = "sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403"}, + {file = "distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16"}, + {file = "distlib-0.4.0.tar.gz", hash = "sha256:feec40075be03a04501a973d81f633735b4b69f98b05450592310c0f401a4e0d"}, ] [[package]] @@ -1581,7 +1598,7 @@ description = "A list-like structure which implements collections.abc.MutableSeq optional = true python-versions = ">=3.9" groups = ["main"] -markers = "python_version == \"3.9\" and extra == \"ray\" or (extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\") and python_version < \"3.10\" or python_version >= \"3.10\" and (extra == \"ray\" or extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\")" +markers = "python_version == \"3.9\" and extra == \"ray\" or (extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\") and python_version < \"3.10\" or python_version >= \"3.10\" and (extra == \"ray\" or extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\")" files = [ {file = "frozenlist-1.7.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cc4df77d638aa2ed703b878dd093725b72a824c3c546c076e8fdf276f78ee84a"}, {file = "frozenlist-1.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:716a9973a2cc963160394f701964fe25012600f3d311f60c790400b00e568b61"}, @@ -1691,14 +1708,14 @@ files = [ [[package]] name = "fsspec" -version = "2025.5.1" +version = "2025.7.0" description = "File-system specification" optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "fsspec-2025.5.1-py3-none-any.whl", hash = "sha256:24d3a2e663d5fc735ab256263c4075f374a174c3410c0b25e5bd1970bceaa462"}, - {file = "fsspec-2025.5.1.tar.gz", hash = "sha256:2e55e47a540b91843b755e83ded97c6e897fa0942b11490113f09e9c443c2475"}, + {file = "fsspec-2025.7.0-py3-none-any.whl", hash = "sha256:8b012e39f63c7d5f10474de957f3ab793b47b45ae7d39f2fb735f8bbe25c0e21"}, + {file = "fsspec-2025.7.0.tar.gz", hash = "sha256:786120687ffa54b8283d942929540d8bc5ccfa820deb555a2b5d0ed2b737bf58"}, ] [package.extras] @@ -1706,7 +1723,7 @@ abfs = ["adlfs"] adl = ["adlfs"] arrow = ["pyarrow (>=1)"] dask = ["dask", "distributed"] -dev = ["pre-commit", "ruff"] +dev = ["pre-commit", "ruff (>=0.5)"] doc = ["numpydoc", "sphinx", "sphinx-design", "sphinx-rtd-theme", "yarl"] dropbox = ["dropbox", "dropboxdrivefs", "requests"] full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"] @@ -1726,26 +1743,26 @@ smb = ["smbprotocol"] ssh = ["paramiko"] test = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "numpy", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "requests"] test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask[dataframe,test]", "moto[server] (>4,<5)", "pytest-timeout", "xarray"] -test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard"] +test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard ; python_version < \"3.14\""] tqdm = ["tqdm"] [[package]] name = "gcsfs" -version = "2025.5.1" +version = "2025.7.0" description = "Convenient Filesystem interface over GCS" optional = true python-versions = ">=3.9" groups = ["main"] markers = "extra == \"gcsfs\"" files = [ - {file = "gcsfs-2025.5.1-py2.py3-none-any.whl", hash = "sha256:48712471ff71ac83d3e2152ba4dc232874698466e344d5e700feba06b0a0de7b"}, - {file = "gcsfs-2025.5.1.tar.gz", hash = "sha256:ba945530cf4857cd9d599ccb3ae729c65c39088880b11c4df1fecac30df5f3e3"}, + {file = "gcsfs-2025.7.0-py2.py3-none-any.whl", hash = "sha256:653503331d58cb02bb34e725d4595d166e93f7f2f3ff88e4c66ef535ae66eae5"}, + {file = "gcsfs-2025.7.0.tar.gz", hash = "sha256:ad3ff66cf189ae8fc375ac8a2af409003dbca02357621cb94a66e457e02ba420"}, ] [package.dependencies] aiohttp = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1" decorator = ">4.1.2" -fsspec = "2025.5.1" +fsspec = "2025.7.0" google-auth = ">=1.2" google-auth-oauthlib = "*" google-cloud-storage = "*" @@ -2001,10 +2018,9 @@ typing-extensions = {version = ">=4,<5", markers = "python_version < \"3.10\""} name = "greenlet" version = "3.2.3" description = "Lightweight in-process concurrent programming" -optional = true +optional = false python-versions = ">=3.9" -groups = ["main"] -markers = "python_version < \"3.14\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\") and (extra == \"sql-postgres\" or extra == \"sql-sqlite\")" +groups = ["main", "dev"] files = [ {file = "greenlet-3.2.3-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:1afd685acd5597349ee6d7a88a8bec83ce13c106ac78c196ee9dde7c04fe87be"}, {file = "greenlet-3.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:761917cac215c61e9dc7324b2606107b3b292a8349bdebb31503ab4de3f559ac"}, @@ -2061,6 +2077,7 @@ files = [ {file = "greenlet-3.2.3-cp39-cp39-win_amd64.whl", hash = "sha256:aaa7aae1e7f75eaa3ae400ad98f8644bb81e1dc6ba47ce8a93d3f17274e08322"}, {file = "greenlet-3.2.3.tar.gz", hash = "sha256:8b0dd8ae4c0d6f5e54ee55ba935eeb3d735a9b58a8a1e5b5cbab64e01a39f365"}, ] +markers = {main = "python_version < \"3.14\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\") and (extra == \"sql-postgres\" or extra == \"sql-sqlite\")", dev = "python_version < \"3.14\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"} [package.extras] docs = ["Sphinx", "furo"] @@ -2330,7 +2347,7 @@ files = [ {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"}, {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, ] -markers = {main = "extra == \"dynamodb\" or extra == \"glue\" or extra == \"rest-sigv4\" or extra == \"s3fs\""} +markers = {main = "extra == \"glue\" or extra == \"dynamodb\" or extra == \"rest-sigv4\" or extra == \"s3fs\""} [[package]] name = "joserfc" @@ -2395,14 +2412,14 @@ files = [ [[package]] name = "jsonschema" -version = "4.24.0" +version = "4.25.0" description = "An implementation of JSON Schema validation for Python" optional = false python-versions = ">=3.9" groups = ["main", "dev"] files = [ - {file = "jsonschema-4.24.0-py3-none-any.whl", hash = "sha256:a462455f19f5faf404a7902952b6f0e3ce868f3ee09a359b05eca6673bd8412d"}, - {file = "jsonschema-4.24.0.tar.gz", hash = "sha256:0b4e8069eb12aedfa881333004bccaec24ecef5a8a6a4b6df142b2cc9599d196"}, + {file = "jsonschema-4.25.0-py3-none-any.whl", hash = "sha256:24c2e8da302de79c8b9382fee3e76b355e44d2a4364bb207159ce10b517bd716"}, + {file = "jsonschema-4.25.0.tar.gz", hash = "sha256:e63acf5c11762c0e6672ffb61482bdf57f0876684d8d249c0fe2d730d48bc55f"}, ] markers = {main = "extra == \"ray\""} @@ -2414,7 +2431,7 @@ rpds-py = ">=0.7.1" [package.extras] format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"] -format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "uri-template", "webcolors (>=24.6.0)"] +format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "rfc3987-syntax (>=1.1.0)", "uri-template", "webcolors (>=24.6.0)"] [[package]] name = "jsonschema-path" @@ -2553,6 +2570,62 @@ files = [ {file = "llvmlite-0.44.0.tar.gz", hash = "sha256:07667d66a5d150abed9157ab6c0b9393c9356f229784a4385c02f99e94fc94d4"}, ] +[[package]] +name = "lz4" +version = "4.4.4" +description = "LZ4 Bindings for Python" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "lz4-4.4.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f170abb8416c4efca48e76cac2c86c3185efdf841aecbe5c190121c42828ced0"}, + {file = "lz4-4.4.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d33a5105cd96ebd32c3e78d7ece6123a9d2fb7c18b84dec61f27837d9e0c496c"}, + {file = "lz4-4.4.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:30ebbc5b76b4f0018988825a7e9ce153be4f0d4eba34e6c1f2fcded120573e88"}, + {file = "lz4-4.4.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc64d6dfa7a89397529b22638939e70d85eaedc1bd68e30a29c78bfb65d4f715"}, + {file = "lz4-4.4.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a355223a284f42a723c120ce68827de66d5cb872a38732b3d5abbf544fa2fe26"}, + {file = "lz4-4.4.4-cp310-cp310-win32.whl", hash = "sha256:b28228197775b7b5096898851d59ef43ccaf151136f81d9c436bc9ba560bc2ba"}, + {file = "lz4-4.4.4-cp310-cp310-win_amd64.whl", hash = "sha256:45e7c954546de4f85d895aa735989d77f87dd649f503ce1c8a71a151b092ed36"}, + {file = "lz4-4.4.4-cp310-cp310-win_arm64.whl", hash = "sha256:e3fc90f766401684740978cd781d73b9685bd81b5dbf7257542ef9de4612e4d2"}, + {file = "lz4-4.4.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ddfc7194cd206496c445e9e5b0c47f970ce982c725c87bd22de028884125b68f"}, + {file = "lz4-4.4.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:714f9298c86f8e7278f1c6af23e509044782fa8220eb0260f8f8f1632f820550"}, + {file = "lz4-4.4.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8474c91de47733856c6686df3c4aca33753741da7e757979369c2c0d32918ba"}, + {file = "lz4-4.4.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80dd27d7d680ea02c261c226acf1d41de2fd77af4fb2da62b278a9376e380de0"}, + {file = "lz4-4.4.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9b7d6dddfd01b49aedb940fdcaf32f41dc58c926ba35f4e31866aeec2f32f4f4"}, + {file = "lz4-4.4.4-cp311-cp311-win32.whl", hash = "sha256:4134b9fd70ac41954c080b772816bb1afe0c8354ee993015a83430031d686a4c"}, + {file = "lz4-4.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:f5024d3ca2383470f7c4ef4d0ed8eabad0b22b23eeefde1c192cf1a38d5e9f78"}, + {file = "lz4-4.4.4-cp311-cp311-win_arm64.whl", hash = "sha256:6ea715bb3357ea1665f77874cf8f55385ff112553db06f3742d3cdcec08633f7"}, + {file = "lz4-4.4.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:23ae267494fdd80f0d2a131beff890cf857f1b812ee72dbb96c3204aab725553"}, + {file = "lz4-4.4.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fff9f3a1ed63d45cb6514bfb8293005dc4141341ce3500abdfeb76124c0b9b2e"}, + {file = "lz4-4.4.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ea7f07329f85a8eda4d8cf937b87f27f0ac392c6400f18bea2c667c8b7f8ecc"}, + {file = "lz4-4.4.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ccab8f7f7b82f9fa9fc3b0ba584d353bd5aa818d5821d77d5b9447faad2aaad"}, + {file = "lz4-4.4.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e43e9d48b2daf80e486213128b0763deed35bbb7a59b66d1681e205e1702d735"}, + {file = "lz4-4.4.4-cp312-cp312-win32.whl", hash = "sha256:33e01e18e4561b0381b2c33d58e77ceee850a5067f0ece945064cbaac2176962"}, + {file = "lz4-4.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:d21d1a2892a2dcc193163dd13eaadabb2c1b803807a5117d8f8588b22eaf9f12"}, + {file = "lz4-4.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:2f4f2965c98ab254feddf6b5072854a6935adab7bc81412ec4fe238f07b85f62"}, + {file = "lz4-4.4.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ed6eb9f8deaf25ee4f6fad9625d0955183fdc90c52b6f79a76b7f209af1b6e54"}, + {file = "lz4-4.4.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:18ae4fe3bafb344dbd09f976d45cbf49c05c34416f2462828f9572c1fa6d5af7"}, + {file = "lz4-4.4.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:57fd20c5fc1a49d1bbd170836fccf9a338847e73664f8e313dce6ac91b8c1e02"}, + {file = "lz4-4.4.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e9cb387c33f014dae4db8cb4ba789c8d2a0a6d045ddff6be13f6c8d9def1d2a6"}, + {file = "lz4-4.4.4-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d0be9f68240231e1e44118a4ebfecd8a5d4184f0bdf5c591c98dd6ade9720afd"}, + {file = "lz4-4.4.4-cp313-cp313-win32.whl", hash = "sha256:e9ec5d45ea43684f87c316542af061ef5febc6a6b322928f059ce1fb289c298a"}, + {file = "lz4-4.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:a760a175b46325b2bb33b1f2bbfb8aa21b48e1b9653e29c10b6834f9bb44ead4"}, + {file = "lz4-4.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:f4c21648d81e0dda38b4720dccc9006ae33b0e9e7ffe88af6bf7d4ec124e2fba"}, + {file = "lz4-4.4.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bd1add57b6fe1f96bed2d529de085e9378a3ac04b86f116d10506f85b68e97fc"}, + {file = "lz4-4.4.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:900912e8a7cf74b4a2bea18a3594ae0bf1138f99919c20017167b6e05f760aa4"}, + {file = "lz4-4.4.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:017f8d269a739405a59d68a4d63d23a8df23e3bb2c70aa069b7563af08dfdffb"}, + {file = "lz4-4.4.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac522788296a9a02a39f620970dea86c38e141e21e51238f1b5e9fa629f8e69"}, + {file = "lz4-4.4.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6b56aa9eef830bf6443acd8c4e18b208a8993dc32e0d6ef4263ecfa6afb3f599"}, + {file = "lz4-4.4.4-cp39-cp39-win32.whl", hash = "sha256:585b42eb37ab16a278c3a917ec23b2beef175aa669f4120142b97aebf90ef775"}, + {file = "lz4-4.4.4-cp39-cp39-win_amd64.whl", hash = "sha256:4ab1537bd3b3bfbafd3c8847e06827129794488304f21945fc2f5b669649d94f"}, + {file = "lz4-4.4.4-cp39-cp39-win_arm64.whl", hash = "sha256:38730927ad51beb42ab8dbc5555270bfbe86167ba734265f88bbd799fced1004"}, + {file = "lz4-4.4.4.tar.gz", hash = "sha256:070fd0627ec4393011251a094e08ed9fdcc78cb4e7ab28f507638eee4e39abda"}, +] + +[package.extras] +docs = ["sphinx (>=1.6.0)", "sphinx_bootstrap_theme"] +flake8 = ["flake8"] +tests = ["psutil", "pytest (!=3.3.0)", "pytest-cov"] + [[package]] name = "markdown" version = "3.8.2" @@ -3069,24 +3142,24 @@ tests = ["pytest (>=4.6)"] [[package]] name = "msal" -version = "1.32.3" +version = "1.33.0" description = "The Microsoft Authentication Library (MSAL) for Python library enables your app to access the Microsoft Cloud by supporting authentication of users with Microsoft Azure Active Directory accounts (AAD) and Microsoft Accounts (MSA) using industry standard OAuth2 and OpenID Connect." optional = true python-versions = ">=3.7" groups = ["main"] markers = "extra == \"adlfs\"" files = [ - {file = "msal-1.32.3-py3-none-any.whl", hash = "sha256:b2798db57760b1961b142f027ffb7c8169536bf77316e99a0df5c4aaebb11569"}, - {file = "msal-1.32.3.tar.gz", hash = "sha256:5eea038689c78a5a70ca8ecbe1245458b55a857bd096efb6989c69ba15985d35"}, + {file = "msal-1.33.0-py3-none-any.whl", hash = "sha256:c0cd41cecf8eaed733ee7e3be9e040291eba53b0f262d3ae9c58f38b04244273"}, + {file = "msal-1.33.0.tar.gz", hash = "sha256:836ad80faa3e25a7d71015c990ce61f704a87328b1e73bcbb0623a18cbf17510"}, ] [package.dependencies] -cryptography = ">=2.5,<47" +cryptography = ">=2.5,<48" PyJWT = {version = ">=1.0.0,<3", extras = ["crypto"]} requests = ">=2.0.0,<3" [package.extras] -broker = ["pymsalruntime (>=0.14,<0.18) ; python_version >= \"3.6\" and platform_system == \"Windows\"", "pymsalruntime (>=0.17,<0.18) ; python_version >= \"3.8\" and platform_system == \"Darwin\""] +broker = ["pymsalruntime (>=0.14,<0.19) ; python_version >= \"3.6\" and platform_system == \"Windows\"", "pymsalruntime (>=0.17,<0.19) ; python_version >= \"3.8\" and platform_system == \"Darwin\"", "pymsalruntime (>=0.18,<0.19) ; python_version >= \"3.8\" and platform_system == \"Linux\""] [[package]] name = "msal-extensions" @@ -3184,7 +3257,7 @@ description = "multidict implementation" optional = true python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\"" +markers = "extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\"" files = [ {file = "multidict-6.6.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a2be5b7b35271f7fff1397204ba6708365e3d773579fe2a30625e16c4b4ce817"}, {file = "multidict-6.6.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:12f4581d2930840295c461764b9a65732ec01250b46c6b2c510d7ee68872b140"}, @@ -3501,7 +3574,7 @@ description = "Fundamental package for array computing in Python" optional = true python-versions = ">=3.9" groups = ["main"] -markers = "python_version < \"3.10\" and (extra == \"bodo\" or extra == \"pandas\" or extra == \"ray\")" +markers = "python_version < \"3.10\" and (extra == \"pandas\" or extra == \"ray\" or extra == \"bodo\")" files = [ {file = "numpy-2.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:51129a29dbe56f9ca83438b706e2e69a39892b5eda6cedcb6b0c9fdc9b0d3ece"}, {file = "numpy-2.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f15975dfec0cf2239224d80e32c3170b1d168335eaedee69da84fbe9f1f9cd04"}, @@ -3557,7 +3630,7 @@ description = "Fundamental package for array computing in Python" optional = true python-versions = ">=3.10" groups = ["main"] -markers = "python_version >= \"3.10\" and (extra == \"bodo\" or extra == \"pandas\" or extra == \"ray\")" +markers = "python_version >= \"3.10\" and (extra == \"pandas\" or extra == \"ray\" or extra == \"bodo\")" files = [ {file = "numpy-2.2.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b412caa66f72040e6d268491a59f2c43bf03eb6c96dd8f0307829feb7fa2b6fb"}, {file = "numpy-2.2.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e41fd67c52b86603a91c1a505ebaef50b3314de0213461c7a6e99c9a3beff90"}, @@ -3705,7 +3778,7 @@ description = "Powerful data structures for data analysis, time series, and stat optional = true python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"bodo\" or extra == \"pandas\" or extra == \"ray\"" +markers = "extra == \"pandas\" or extra == \"ray\" or extra == \"bodo\"" files = [ {file = "pandas-2.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:22c2e866f7209ebc3a8f08d75766566aae02bcc91d196935a1d9e59c7b990ac9"}, {file = "pandas-2.3.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3583d348546201aff730c8c47e49bc159833f971c2899d6097bce68b9112a4f1"}, @@ -3754,8 +3827,8 @@ files = [ [package.dependencies] numpy = [ {version = ">=1.22.4", markers = "python_version < \"3.11\""}, - {version = ">=1.23.2", markers = "python_version == \"3.11\""}, {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, + {version = ">=1.23.2", markers = "python_version == \"3.11\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -3926,7 +3999,7 @@ description = "Accelerated property cache" optional = true python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\"" +markers = "extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\"" files = [ {file = "propcache-0.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:22d9962a358aedbb7a2e36187ff273adeaab9743373a272976d2e348d08c7770"}, {file = "propcache-0.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0d0fda578d1dc3f77b6b5a5dce3b9ad69a8250a891760a548df850a5e8da87f3"}, @@ -4221,7 +4294,7 @@ description = "Python library for Apache Arrow" optional = true python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"bodo\" or extra == \"daft\" or extra == \"datafusion\" or extra == \"duckdb\" or extra == \"pandas\" or extra == \"pyarrow\" or extra == \"ray\"" +markers = "extra == \"pyarrow\" or extra == \"pandas\" or extra == \"duckdb\" or extra == \"ray\" or extra == \"bodo\" or extra == \"daft\" or extra == \"datafusion\"" files = [ {file = "pyarrow-19.0.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:fc28912a2dc924dddc2087679cc8b7263accc71b9ff025a1362b004711661a69"}, {file = "pyarrow-19.0.1-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:fca15aabbe9b8355800d923cc2e82c8ef514af321e18b437c3d782aa884eaeec"}, @@ -4310,7 +4383,7 @@ files = [ {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"}, {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"}, ] -markers = {main = "(extra == \"zstandard\" or extra == \"adlfs\") and platform_python_implementation == \"PyPy\" or extra == \"adlfs\"", dev = "platform_python_implementation != \"PyPy\""} +markers = {main = "(extra == \"zstandard\" or extra == \"adlfs\") and platform_python_implementation == \"PyPy\" or extra == \"adlfs\""} [[package]] name = "pydantic" @@ -4772,14 +4845,14 @@ cramjam = "*" name = "pytz" version = "2025.2" description = "World timezone definitions, modern and historical" -optional = true +optional = false python-versions = "*" -groups = ["main"] -markers = "extra == \"bodo\" or extra == \"pandas\" or extra == \"ray\"" +groups = ["main", "dev"] files = [ {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"}, {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"}, ] +markers = {main = "extra == \"pandas\" or extra == \"ray\" or extra == \"bodo\""} [[package]] name = "pywin32" @@ -5394,21 +5467,21 @@ pyasn1 = ">=0.1.3" [[package]] name = "s3fs" -version = "2025.5.1" +version = "2025.7.0" description = "Convenient Filesystem interface over S3" optional = true python-versions = ">=3.9" groups = ["main"] markers = "extra == \"s3fs\"" files = [ - {file = "s3fs-2025.5.1-py3-none-any.whl", hash = "sha256:7475e7c40a3a112f17144907ffae50782ab6c03487fe0b45a9c3942bb7a5c606"}, - {file = "s3fs-2025.5.1.tar.gz", hash = "sha256:84beffa231b8ed94f8d667e93387b38351e1c4447aedea5c2c19dd88b7fcb658"}, + {file = "s3fs-2025.7.0-py3-none-any.whl", hash = "sha256:b6b2d3f84b6aa1c2ba5e62e39dd9410cf54f10a2cce1ea6db1ba0d1a6bcce685"}, + {file = "s3fs-2025.7.0.tar.gz", hash = "sha256:5e7f9ec0cad7745155e3eb86fae15b1481fa29946bf5b3a4ce3a60701ce6022d"}, ] [package.dependencies] aiobotocore = ">=2.5.4,<3.0.0" aiohttp = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1" -fsspec = "2025.5.1" +fsspec = "2025.7.0" [package.extras] awscli = ["aiobotocore[awscli] (>=2.5.4,<3.0.0)"] @@ -5416,16 +5489,16 @@ boto3 = ["aiobotocore[boto3] (>=2.5.4,<3.0.0)"] [[package]] name = "s3transfer" -version = "0.13.0" +version = "0.13.1" description = "An Amazon S3 Transfer Manager" optional = false python-versions = ">=3.9" groups = ["main", "dev"] files = [ - {file = "s3transfer-0.13.0-py3-none-any.whl", hash = "sha256:0148ef34d6dd964d0d8cf4311b2b21c474693e57c2e069ec708ce043d2b527be"}, - {file = "s3transfer-0.13.0.tar.gz", hash = "sha256:f5e6db74eb7776a37208001113ea7aa97695368242b364d73e91c981ac522177"}, + {file = "s3transfer-0.13.1-py3-none-any.whl", hash = "sha256:a981aa7429be23fe6dfc13e80e4020057cbab622b08c0315288758d67cabc724"}, + {file = "s3transfer-0.13.1.tar.gz", hash = "sha256:c3fdba22ba1bd367922f27ec8032d6a1cf5f10c934fb5d68cf60fd5a23d936cf"}, ] -markers = {main = "extra == \"dynamodb\" or extra == \"glue\" or extra == \"rest-sigv4\""} +markers = {main = "extra == \"glue\" or extra == \"dynamodb\" or extra == \"rest-sigv4\""} [package.dependencies] botocore = ">=1.37.4,<2.0a.0" @@ -5706,10 +5779,9 @@ test = ["pytest"] name = "sqlalchemy" version = "2.0.41" description = "Database Abstraction Library" -optional = true +optional = false python-versions = ">=3.7" -groups = ["main"] -markers = "extra == \"sql-postgres\" or extra == \"sql-sqlite\"" +groups = ["main", "dev"] files = [ {file = "SQLAlchemy-2.0.41-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6854175807af57bdb6425e47adbce7d20a4d79bbfd6f6d6519cd10bb7109a7f8"}, {file = "SQLAlchemy-2.0.41-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05132c906066142103b83d9c250b60508af556982a385d96c4eaa9fb9720ac2b"}, @@ -5769,6 +5841,7 @@ files = [ {file = "sqlalchemy-2.0.41-py3-none-any.whl", hash = "sha256:57df5dc6fdb5ed1a88a1ed2195fd31927e705cad62dedd86b46972752a80f576"}, {file = "sqlalchemy-2.0.41.tar.gz", hash = "sha256:edba70118c4be3c2b1f90754d308d0b79c6fe2c0fdc52d8ddf603916f83f4db9"}, ] +markers = {main = "extra == \"sql-postgres\" or extra == \"sql-sqlite\""} [package.dependencies] greenlet = {version = ">=1", markers = "python_version < \"3.14\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"} @@ -5933,7 +6006,7 @@ description = "Fast, Extensible Progress Meter" optional = true python-versions = ">=3.7" groups = ["main"] -markers = "extra == \"daft\" or extra == \"hf\"" +markers = "extra == \"hf\" or extra == \"daft\"" files = [ {file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"}, {file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"}, @@ -5949,6 +6022,35 @@ notebook = ["ipywidgets (>=6)"] slack = ["slack-sdk"] telegram = ["requests"] +[[package]] +name = "trino" +version = "0.335.0" +description = "Client for the Trino distributed SQL Engine" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "trino-0.335.0-py3-none-any.whl", hash = "sha256:5c96d89d610ab7712ede532d2eb41beb8627339571bceff6134370a8a496f685"}, + {file = "trino-0.335.0.tar.gz", hash = "sha256:b5e6c928953689be8446cbf7dbb87894cbfe54cf099a85cf461c4206c252cd67"}, +] + +[package.dependencies] +lz4 = "*" +python-dateutil = "*" +pytz = "*" +requests = ">=2.31.0" +sqlalchemy = {version = ">=1.3", optional = true, markers = "extra == \"sqlalchemy\""} +tzlocal = "*" +zstandard = "*" + +[package.extras] +all = ["requests_kerberos", "sqlalchemy (>=1.3)"] +external-authentication-token-cache = ["keyring"] +gssapi = ["krb5 (==0.5.1)", "requests_gssapi"] +kerberos = ["requests_kerberos"] +sqlalchemy = ["sqlalchemy (>=1.3)"] +tests = ["black", "boto3", "httpretty (<1.1)", "isort", "keyring", "krb5 (==0.5.1)", "pre-commit", "pytest", "pytest-runner", "requests_gssapi", "requests_kerberos", "sqlalchemy (>=1.3)", "testcontainers"] + [[package]] name = "typing-extensions" version = "4.14.1" @@ -5981,14 +6083,32 @@ typing-extensions = ">=4.12.0" name = "tzdata" version = "2025.2" description = "Provider of IANA time zone data" -optional = true +optional = false python-versions = ">=2" -groups = ["main"] -markers = "extra == \"bodo\" or extra == \"pandas\" or extra == \"ray\"" +groups = ["main", "dev"] files = [ {file = "tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8"}, {file = "tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9"}, ] +markers = {main = "extra == \"pandas\" or extra == \"ray\" or extra == \"bodo\"", dev = "platform_system == \"Windows\""} + +[[package]] +name = "tzlocal" +version = "5.3.1" +description = "tzinfo object for the local timezone" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "tzlocal-5.3.1-py3-none-any.whl", hash = "sha256:eb1a66c3ef5847adf7a834f1be0800581b683b5608e74f86ecbcef8ab91bb85d"}, + {file = "tzlocal-5.3.1.tar.gz", hash = "sha256:cceffc7edecefea1f595541dbd6e990cb1ea3d19bf01b2809f362a03dd7921fd"}, +] + +[package.dependencies] +tzdata = {version = "*", markers = "platform_system == \"Windows\""} + +[package.extras] +devenv = ["check-manifest", "pytest (>=4.3)", "pytest-cov", "pytest-mock (>=3.3)", "zest.releaser"] [[package]] name = "urllib3" @@ -6029,14 +6149,14 @@ zstd = ["zstandard (>=0.18.0)"] [[package]] name = "virtualenv" -version = "20.31.2" +version = "20.32.0" description = "Virtual Python Environment builder" optional = false python-versions = ">=3.8" groups = ["dev"] files = [ - {file = "virtualenv-20.31.2-py3-none-any.whl", hash = "sha256:36efd0d9650ee985f0cad72065001e66d49a6f24eb44d98980f630686243cf11"}, - {file = "virtualenv-20.31.2.tar.gz", hash = "sha256:e10c0a9d02835e592521be48b332b6caee6887f332c111aa79a09b9e79efc2af"}, + {file = "virtualenv-20.32.0-py3-none-any.whl", hash = "sha256:2c310aecb62e5aa1b06103ed7c2977b81e042695de2697d01017ff0f1034af56"}, + {file = "virtualenv-20.32.0.tar.gz", hash = "sha256:886bf75cadfdc964674e6e33eb74d787dff31ca314ceace03ca5810620f4ecf0"}, ] [package.dependencies] @@ -6218,7 +6338,7 @@ description = "Yet another URL library" optional = true python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\"" +markers = "extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\"" files = [ {file = "yarl-1.20.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:6032e6da6abd41e4acda34d75a816012717000fa6839f37124a47fcefc49bec4"}, {file = "yarl-1.20.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2c7b34d804b8cf9b214f05015c4fee2ebe7ed05cf581e7192c06555c71f4446a"}, @@ -6358,8 +6478,7 @@ version = "0.23.0" description = "Zstandard bindings for Python" optional = false python-versions = ">=3.8" -groups = ["main"] -markers = "extra == \"zstandard\"" +groups = ["main", "dev"] files = [ {file = "zstandard-0.23.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bf0a05b6059c0528477fba9054d09179beb63744355cab9f38059548fedd46a9"}, {file = "zstandard-0.23.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fc9ca1c9718cb3b06634c7c8dec57d24e9438b2aa9a0f02b8bb36bf478538880"}, @@ -6459,6 +6578,7 @@ files = [ {file = "zstandard-0.23.0-cp39-cp39-win_amd64.whl", hash = "sha256:f8346bfa098532bc1fb6c7ef06783e969d87a99dd1d2a5a18a892c1d7a643c58"}, {file = "zstandard-0.23.0.tar.gz", hash = "sha256:b2d8c62d08e7255f68f7a740bae85b3c9b8e5466baa9cbf7f57f1cde0ac6bc09"}, ] +markers = {main = "extra == \"zstandard\""} [package.dependencies] cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\""} @@ -6493,4 +6613,4 @@ zstandard = ["zstandard"] [metadata] lock-version = "2.1" python-versions = "^3.9.2, !=3.9.7" -content-hash = "5cef3b70e2b74aaa1ba9d9b718c6374dfc137b87d9b4eeaab572604cbf616bd5" +content-hash = "16572e7c065a3d0cd52d9a2b5e069cddcf7f16c77d4d9e77094873d35402b131" diff --git a/pyproject.toml b/pyproject.toml index 6f1b7c779b..0c06a8f8e2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -104,6 +104,7 @@ deptry = ">=0.14,<0.24" docutils = "!=0.21.post1" # https://github.com/python-poetry/poetry/issues/9248#issuecomment-2026240520 mypy-boto3-glue = ">=1.28.18" mypy-boto3-dynamodb = ">=1.28.18" +trino = {extras = ["sqlalchemy"], version = "^0.335.0"} [tool.poetry.group.docs.dependencies] # for mkdocs @@ -326,6 +327,7 @@ markers = [ "s3: marks a test as requiring access to s3 compliant storage (use with --aws-access-key-id, --aws-secret-access-key, and --endpoint args)", "adls: marks a test as requiring access to adls compliant storage (use with --adls.account-name, --adls.account-key, and --adls.endpoint args)", "integration: marks integration tests against Apache Spark", + "integration_trino: marks integration tests against Trino", "gcs: marks a test as requiring access to gcs compliant storage (use with --gs.token, --gs.project, and --gs.endpoint)", "benchmark: collection of tests to validate read/write performance before and after a change" ] diff --git a/tests/conftest.py b/tests/conftest.py index 584b6c633a..d2662ec05d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -35,18 +35,12 @@ from pathlib import Path from random import choice, randint from tempfile import TemporaryDirectory -from typing import ( - TYPE_CHECKING, - Any, - Dict, - Generator, - List, - Optional, -) +from typing import TYPE_CHECKING, Any, Dict, Generator, List, Optional import boto3 import pytest from moto import mock_aws +from sqlalchemy import Connection from pyiceberg.catalog import Catalog, load_catalog from pyiceberg.catalog.noop import NoopCatalog @@ -141,6 +135,18 @@ def pytest_addoption(parser: pytest.Parser) -> None: "--gcs.oauth2.token", action="store", default="anon", help="The GCS authentication method for tests marked gcs" ) parser.addoption("--gcs.project-id", action="store", default="test", help="The GCP project for tests marked gcs") + parser.addoption( + "--trino.rest.endpoint", + action="store", + default="trino://test@localhost:8082/warehouse_rest", + help="The Trino REST endpoint URL for tests marked as integration_trino", + ) + parser.addoption( + "--trino.hive.endpoint", + action="store", + default="trino://test@localhost:8082/warehouse_hive", + help="The Trino Hive endpoint URL for tests marked as integration_trino", + ) @pytest.fixture(scope="session") @@ -2436,6 +2442,28 @@ def bound_reference_uuid() -> BoundReference[str]: return BoundReference(field=NestedField(1, "field", UUIDType(), required=False), accessor=Accessor(position=0, inner=None)) +@pytest.fixture(scope="session") +def trino_hive_conn(request: pytest.FixtureRequest) -> Generator[Connection, None, None]: + from sqlalchemy import create_engine + + trino_endpoint = request.config.getoption("--trino.hive.endpoint") + engine = create_engine(trino_endpoint) + connection = engine.connect() + yield connection + connection.close() + + +@pytest.fixture(scope="session") +def trino_rest_conn(request: pytest.FixtureRequest) -> Generator[Connection, None, None]: + from sqlalchemy import create_engine + + trino_endpoint = request.config.getoption("--trino.rest.endpoint") + engine = create_engine(trino_endpoint) + connection = engine.connect() + yield connection + connection.close() + + @pytest.fixture(scope="session") def session_catalog() -> Catalog: return load_catalog( diff --git a/tests/integration/test_register_table.py b/tests/integration/test_register_table.py index c0db2014af..2d2c07d107 100644 --- a/tests/integration/test_register_table.py +++ b/tests/integration/test_register_table.py @@ -15,19 +15,14 @@ # specific language governing permissions and limitations # under the License. import pytest +from sqlalchemy import Connection, inspect from pyiceberg.catalog import Catalog from pyiceberg.exceptions import NoSuchTableError, TableAlreadyExistsError from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec from pyiceberg.schema import Schema from pyiceberg.table import Table -from pyiceberg.types import ( - BooleanType, - DateType, - IntegerType, - NestedField, - StringType, -) +from pyiceberg.types import BooleanType, DateType, IntegerType, NestedField, StringType TABLE_SCHEMA = Schema( NestedField(field_id=1, name="foo", field_type=BooleanType(), required=False), @@ -86,3 +81,36 @@ def test_register_table_existing( # Assert that registering the table again raises TableAlreadyExistsError with pytest.raises(TableAlreadyExistsError): catalog.register_table(("default", "register_table_existing"), metadata_location=tbl.metadata_location) + + +@pytest.mark.integration_trino +@pytest.mark.integration +@pytest.mark.parametrize( + "catalog, trino_conn", + [ + (pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("trino_hive_conn")), + (pytest.lazy_fixture("session_catalog"), pytest.lazy_fixture("trino_rest_conn")), + ], +) +def test_register_table_existing_in_trino( + catalog: Catalog, + trino_conn: Connection, +) -> None: + """Test the registration of a table in the catalog that already exists in Trino. + This test verifies that a table can be registered in the catalog with an existing + metadata location and properly reflected in Trino. + """ + namespace = "default" + table_name = "register_table_trino" + identifier = f"{namespace}.{table_name}" + location = f"s3a://warehouse/{namespace}/{table_name}" + catalog.create_namespace_if_not_exists(namespace) + tbl = _create_table(catalog, identifier, 2, location) + assert catalog.table_exists(identifier=identifier) + assert table_name in inspect(trino_conn).get_table_names(schema=namespace) + catalog.drop_table(identifier=identifier) + assert not catalog.table_exists(identifier=identifier) + assert table_name not in inspect(trino_conn).get_table_names(schema=namespace) + catalog.register_table((namespace, table_name), metadata_location=tbl.metadata_location) + assert catalog.table_exists(identifier=identifier) + assert table_name in inspect(trino_conn).get_table_names(schema=namespace) diff --git a/tests/integration/test_rest_catalog.py b/tests/integration/test_rest_catalog.py index 24a8d9f6ef..092428e641 100644 --- a/tests/integration/test_rest_catalog.py +++ b/tests/integration/test_rest_catalog.py @@ -17,6 +17,7 @@ # pylint:disable=redefined-outer-name import pytest +from sqlalchemy import Connection, inspect from pyiceberg.catalog.rest import RestCatalog @@ -61,3 +62,22 @@ def test_create_namespace_if_already_existing(catalog: RestCatalog) -> None: catalog.create_namespace_if_not_exists(TEST_NAMESPACE_IDENTIFIER) assert catalog.namespace_exists(TEST_NAMESPACE_IDENTIFIER) + + +@pytest.mark.integration +@pytest.mark.integration_trino +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog")]) +def test_schema_exists_in_trino(trino_rest_conn: Connection, catalog: RestCatalog) -> None: + """Verifies that an Iceberg namespace correctly appears as a schema in Trino. + + This test ensures the synchronization between Iceberg's namespace concept and + Trino's schema concept, confirming that after creating a namespace in the Iceberg + catalog, it becomes visible as a schema in the Trino environment. + """ + + if catalog.namespace_exists(TEST_NAMESPACE_IDENTIFIER): + catalog.drop_namespace(TEST_NAMESPACE_IDENTIFIER) + catalog.create_namespace_if_not_exists(TEST_NAMESPACE_IDENTIFIER) + + assert catalog.namespace_exists(TEST_NAMESPACE_IDENTIFIER) + assert TEST_NAMESPACE_IDENTIFIER.lower() in inspect(trino_rest_conn).get_schema_names() diff --git a/tests/integration/test_writes/test_writes.py b/tests/integration/test_writes/test_writes.py index e63883c1db..e7d80ccba2 100644 --- a/tests/integration/test_writes/test_writes.py +++ b/tests/integration/test_writes/test_writes.py @@ -38,6 +38,8 @@ from pydantic_core import ValidationError from pyspark.sql import SparkSession from pytest_mock.plugin import MockerFixture +from sqlalchemy import Connection +from sqlalchemy.sql.expression import text from pyiceberg.catalog import Catalog, load_catalog from pyiceberg.catalog.hive import HiveCatalog @@ -50,18 +52,8 @@ from pyiceberg.table import TableProperties from pyiceberg.table.refs import MAIN_BRANCH from pyiceberg.table.sorting import SortDirection, SortField, SortOrder -from pyiceberg.transforms import DayTransform, HourTransform, IdentityTransform, Transform -from pyiceberg.types import ( - DateType, - DecimalType, - DoubleType, - IntegerType, - ListType, - LongType, - NestedField, - StringType, - UUIDType, -) +from pyiceberg.transforms import BucketTransform, DayTransform, HourTransform, IdentityTransform, Transform +from pyiceberg.types import DateType, DecimalType, DoubleType, IntegerType, ListType, LongType, NestedField, StringType, UUIDType from utils import _create_table @@ -1865,6 +1857,7 @@ def test_read_write_decimals(session_catalog: Catalog) -> None: assert tbl.scan().to_arrow() == arrow_table +@pytest.mark.skip("UUID BucketTransform is not supported in Spark Iceberg 1.9.2 yet") @pytest.mark.integration @pytest.mark.parametrize( "transform", @@ -1918,6 +1911,64 @@ def test_uuid_partitioning(session_catalog: Catalog, spark: SparkSession, transf assert lhs == rhs +@pytest.mark.integration_trino +@pytest.mark.integration +@pytest.mark.parametrize( + "transform", + [IdentityTransform(), BucketTransform(32)], +) +@pytest.mark.parametrize( + "catalog, trino_conn", + [ + (pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("trino_hive_conn")), + (pytest.lazy_fixture("session_catalog"), pytest.lazy_fixture("trino_rest_conn")), + ], +) +def test_uuid_partitioning_with_trino(catalog: Catalog, trino_conn: Connection, transform: Transform) -> None: # type: ignore + identifier = f"default.test_uuid_partitioning_{str(transform).replace('[32]', '')}" + + schema = Schema(NestedField(field_id=1, name="uuid", field_type=UUIDType(), required=True)) + + try: + catalog.drop_table(identifier=identifier) + except NoSuchTableError: + pass + + partition_spec = PartitionSpec( + PartitionField(source_id=1, field_id=1000, transform=transform, name=f"uuid_{str(transform).replace('[32]', '')}") + ) + + import pyarrow as pa + + arr_table = pa.Table.from_pydict( + { + "uuid": [ + uuid.UUID("00000000-0000-0000-0000-000000000000").bytes, + uuid.UUID("11111111-1111-1111-1111-111111111111").bytes, + ], + }, + schema=pa.schema( + [ + # Uuid not yet supported, so we have to stick with `binary(16)` + # https://github.com/apache/arrow/issues/46468 + pa.field("uuid", pa.binary(16), nullable=False), + ] + ), + ) + + tbl = catalog.create_table( + identifier=identifier, + schema=schema, + partition_spec=partition_spec, + ) + + tbl.append(arr_table) + rows = trino_conn.execute(text(f"SELECT * FROM {identifier}")).fetchall() + lhs = sorted([r[0] for r in rows]) + rhs = sorted([u.as_py() for u in tbl.scan().to_arrow()["uuid"].combine_chunks()]) + assert lhs == rhs + + @pytest.mark.integration def test_avro_compression_codecs(session_catalog: Catalog, arrow_table_with_null: pa.Table) -> None: identifier = "default.test_avro_compression_codecs"