From 1ddc5d3fb127264447a1db5e95b27b45932dba89 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Sun, 21 Sep 2025 12:03:27 -0700 Subject: [PATCH 1/2] use latest pyiceberg-core from testpypi --- poetry.lock | 101 ++++++++++++++++++++++++++----------------------- pyproject.toml | 8 +++- 2 files changed, 60 insertions(+), 49 deletions(-) diff --git a/poetry.lock b/poetry.lock index 0bcfe310b4..e37f3ddefa 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.4 and should not be changed by hand. [[package]] name = "adlfs" @@ -59,7 +59,7 @@ description = "Happy Eyeballs for asyncio" optional = true python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\"" +markers = "extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\"" files = [ {file = "aiohappyeyeballs-2.6.1-py3-none-any.whl", hash = "sha256:f349ba8f4b75cb25c99c5c2d84e997e485204d2902a9597802b0371f09331fb8"}, {file = "aiohappyeyeballs-2.6.1.tar.gz", hash = "sha256:c3f9d0113123803ccadfdf3f0faa505bc78e6a72d1cc4806cbd719826e943558"}, @@ -72,7 +72,7 @@ description = "Async http client/server framework (asyncio)" optional = true python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\"" +markers = "extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\"" files = [ {file = "aiohttp-3.12.12-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:6f25e9d274d6abbb15254f76f100c3984d6b9ad6e66263cc60a465dd5c7e48f5"}, {file = "aiohttp-3.12.12-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b8ec3c1a1c13d24941b5b913607e57b9364e4c0ea69d5363181467492c4b2ba6"}, @@ -202,7 +202,7 @@ description = "aiosignal: a list of registered asynchronous callbacks" optional = true python-versions = ">=3.9" groups = ["main"] -markers = "(extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\") and (extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\" or extra == \"ray\")" +markers = "extra == \"ray\" or extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\"" files = [ {file = "aiosignal-1.3.2-py2.py3-none-any.whl", hash = "sha256:45cde58e409a301715980c2b01d0c28bdde3770d8290b5eb2173759d9acb31a5"}, {file = "aiosignal-1.3.2.tar.gz", hash = "sha256:a8c255c66fafb1e499c9351d0bf32ff2d8a0321595ebac3b93713656d2436f54"}, @@ -254,7 +254,7 @@ description = "Timeout context manager for asyncio programs" optional = true python-versions = ">=3.8" groups = ["main"] -markers = "(extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\") and python_version <= \"3.10\"" +markers = "(extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\") and python_version <= \"3.10\"" files = [ {file = "async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"}, {file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"}, @@ -271,7 +271,7 @@ files = [ {file = "attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3"}, {file = "attrs-25.3.0.tar.gz", hash = "sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b"}, ] -markers = {main = "(extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\") and (extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\" or extra == \"ray\")"} +markers = {main = "extra == \"ray\" or extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\""} [package.extras] benchmark = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] @@ -530,7 +530,7 @@ files = [ {file = "boto3-1.40.18-py3-none-any.whl", hash = "sha256:daa776ba1251a7458c9d6c7627873d0c2460c8e8272d35759065580e9193700a"}, {file = "boto3-1.40.18.tar.gz", hash = "sha256:64301d39adecc154e3e595eaf0d4f28998ef0a5551f1d033aeac51a9e1a688e5"}, ] -markers = {main = "extra == \"dynamodb\" or extra == \"glue\" or extra == \"rest-sigv4\""} +markers = {main = "extra == \"glue\" or extra == \"dynamodb\" or extra == \"rest-sigv4\""} [package.dependencies] botocore = ">=1.40.18,<1.41.0" @@ -551,14 +551,14 @@ files = [ {file = "botocore-1.40.18-py3-none-any.whl", hash = "sha256:57025c46ca00cf8cec25de07a759521bfbfb3036a0f69b272654a354615dc45f"}, {file = "botocore-1.40.18.tar.gz", hash = "sha256:afd69bdadd8c55cc89d69de0799829e555193a352d87867f746e19020271cc0f"}, ] -markers = {main = "extra == \"dynamodb\" or extra == \"glue\" or extra == \"rest-sigv4\" or extra == \"s3fs\""} +markers = {main = "extra == \"glue\" or extra == \"dynamodb\" or extra == \"rest-sigv4\" or extra == \"s3fs\""} [package.dependencies] jmespath = ">=0.7.1,<2.0.0" python-dateutil = ">=2.1,<3.0.0" urllib3 = [ - {version = ">=1.25.4,<1.27", markers = "python_version < \"3.10\""}, {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >= \"3.10\""}, + {version = ">=1.25.4,<1.27", markers = "python_version < \"3.10\""}, ] [package.extras] @@ -1601,7 +1601,7 @@ description = "A list-like structure which implements collections.abc.MutableSeq optional = true python-versions = ">=3.9" groups = ["main"] -markers = "(extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\") and (extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\" or extra == \"ray\")" +markers = "extra == \"ray\" or extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\"" files = [ {file = "frozenlist-1.7.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cc4df77d638aa2ed703b878dd093725b72a824c3c546c076e8fdf276f78ee84a"}, {file = "frozenlist-1.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:716a9973a2cc963160394f701964fe25012600f3d311f60c790400b00e568b61"}, @@ -1800,7 +1800,7 @@ description = "Google API client core library" optional = true python-versions = ">=3.7" groups = ["main"] -markers = "extra == \"gcsfs\" or extra == \"bigquery\"" +markers = "extra == \"bigquery\" or extra == \"gcsfs\"" files = [ {file = "google_api_core-2.25.0-py3-none-any.whl", hash = "sha256:1db79d1281dcf9f3d10023283299ba38f3dc9f639ec41085968fd23e5bcf512e"}, {file = "google_api_core-2.25.0.tar.gz", hash = "sha256:9b548e688702f82a34ed8409fb8a6961166f0b7795032f0be8f48308dff4333a"}, @@ -1810,16 +1810,16 @@ files = [ google-auth = ">=2.14.1,<3.0.0" googleapis-common-protos = ">=1.56.2,<2.0.0" grpcio = [ - {version = ">=1.33.2,<2.0.0", optional = true, markers = "extra == \"grpc\""}, {version = ">=1.49.1,<2.0.0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, + {version = ">=1.33.2,<2.0.0", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, ] grpcio-status = [ - {version = ">=1.33.2,<2.0.0", optional = true, markers = "extra == \"grpc\""}, {version = ">=1.49.1,<2.0.0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, + {version = ">=1.33.2,<2.0.0", optional = true, markers = "extra == \"grpc\""}, ] proto-plus = [ - {version = ">=1.22.3,<2.0.0"}, {version = ">=1.25.0,<2.0.0", markers = "python_version >= \"3.13\""}, + {version = ">=1.22.3,<2.0.0"}, ] protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<7.0.0" requests = ">=2.18.0,<3.0.0" @@ -1837,7 +1837,7 @@ description = "Google Authentication Library" optional = true python-versions = ">=3.7" groups = ["main"] -markers = "extra == \"gcsfs\" or extra == \"bigquery\" or extra == \"gcp-auth\"" +markers = "extra == \"gcp-auth\" or extra == \"bigquery\" or extra == \"gcsfs\"" files = [ {file = "google_auth-2.40.3-py2.py3-none-any.whl", hash = "sha256:1370d4593e86213563547f97a92752fc658456fe4514c809544f330fed45a7ca"}, {file = "google_auth-2.40.3.tar.gz", hash = "sha256:500c3a29adedeb36ea9cf24b8d10858e152f2412e3ca37829b3fa18e33d63b77"}, @@ -1919,7 +1919,7 @@ description = "Google Cloud API client core library" optional = true python-versions = ">=3.7" groups = ["main"] -markers = "extra == \"gcsfs\" or extra == \"bigquery\"" +markers = "extra == \"bigquery\" or extra == \"gcsfs\"" files = [ {file = "google_cloud_core-2.4.3-py2.py3-none-any.whl", hash = "sha256:5130f9f4c14b4fafdff75c79448f9495cfade0d8775facf1b09c3bf67e027f6e"}, {file = "google_cloud_core-2.4.3.tar.gz", hash = "sha256:1fab62d7102844b278fe6dead3af32408b1df3eb06f5c7e8634cbd40edc4da53"}, @@ -1964,7 +1964,7 @@ description = "A python wrapper of the C library 'Google CRC32C'" optional = true python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"gcsfs\" or extra == \"bigquery\"" +markers = "extra == \"bigquery\" or extra == \"gcsfs\"" files = [ {file = "google_crc32c-1.7.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:b07d48faf8292b4db7c3d64ab86f950c2e94e93a11fd47271c28ba458e4a0d76"}, {file = "google_crc32c-1.7.1-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:7cc81b3a2fbd932a4313eb53cc7d9dde424088ca3a0337160f35d91826880c1d"}, @@ -2012,7 +2012,7 @@ description = "Utilities for Google Media Downloads and Resumable Uploads" optional = true python-versions = ">=3.7" groups = ["main"] -markers = "extra == \"gcsfs\" or extra == \"bigquery\"" +markers = "extra == \"bigquery\" or extra == \"gcsfs\"" files = [ {file = "google_resumable_media-2.7.2-py2.py3-none-any.whl", hash = "sha256:3ce7551e9fe6d99e9a126101d2536612bb73486721951e9562fee0f90c6ababa"}, {file = "google_resumable_media-2.7.2.tar.gz", hash = "sha256:5280aed4629f2b60b847b0d42f9857fd4935c11af266744df33d8074cae92fe0"}, @@ -2032,7 +2032,7 @@ description = "Common protobufs used in Google APIs" optional = true python-versions = ">=3.7" groups = ["main"] -markers = "extra == \"bigquery\" or extra == \"gcsfs\" or python_version <= \"3.10\" and (extra == \"gcsfs\" or extra == \"bigquery\")" +markers = "extra == \"bigquery\" or extra == \"gcsfs\"" files = [ {file = "googleapis_common_protos-1.70.0-py3-none-any.whl", hash = "sha256:b8bfcca8c25a2bb253e0e0b0adaf8c00773e5e6af6fd92397576680b807e0fd8"}, {file = "googleapis_common_protos-1.70.0.tar.gz", hash = "sha256:0e1b44e0ea153e6594f9f394fef15193a68aaaea2d843f83e2742717ca753257"}, @@ -2066,7 +2066,7 @@ description = "Lightweight in-process concurrent programming" optional = true python-versions = ">=3.9" groups = ["main"] -markers = "python_version < \"3.14\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\") and (extra == \"sql-postgres\" or extra == \"sql-sqlite\")" +markers = "(platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\") and (extra == \"sql-postgres\" or extra == \"sql-sqlite\") and python_version <= \"3.13\"" files = [ {file = "greenlet-3.2.3-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:1afd685acd5597349ee6d7a88a8bec83ce13c106ac78c196ee9dde7c04fe87be"}, {file = "greenlet-3.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:761917cac215c61e9dc7324b2606107b3b292a8349bdebb31503ab4de3f559ac"}, @@ -2475,7 +2475,7 @@ files = [ {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"}, {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, ] -markers = {main = "extra == \"dynamodb\" or extra == \"glue\" or extra == \"rest-sigv4\" or extra == \"s3fs\""} +markers = {main = "extra == \"glue\" or extra == \"dynamodb\" or extra == \"rest-sigv4\" or extra == \"s3fs\""} [[package]] name = "joserfc" @@ -3375,7 +3375,7 @@ description = "multidict implementation" optional = true python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\"" +markers = "extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\"" files = [ {file = "multidict-6.4.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8adee3ac041145ffe4488ea73fa0a622b464cc25340d98be76924d0cda8545ff"}, {file = "multidict-6.4.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b61e98c3e2a861035aaccd207da585bdcacef65fe01d7a0d07478efac005e028"}, @@ -3686,7 +3686,7 @@ description = "Fundamental package for array computing in Python" optional = true python-versions = ">=3.9" groups = ["main"] -markers = "python_version < \"3.10\" and (extra == \"bodo\" or extra == \"pandas\" or extra == \"ray\")" +markers = "python_version < \"3.10\" and (extra == \"pandas\" or extra == \"ray\" or extra == \"bodo\")" files = [ {file = "numpy-2.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:51129a29dbe56f9ca83438b706e2e69a39892b5eda6cedcb6b0c9fdc9b0d3ece"}, {file = "numpy-2.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f15975dfec0cf2239224d80e32c3170b1d168335eaedee69da84fbe9f1f9cd04"}, @@ -3742,7 +3742,7 @@ description = "Fundamental package for array computing in Python" optional = true python-versions = ">=3.10" groups = ["main"] -markers = "python_version >= \"3.10\" and (extra == \"bodo\" or extra == \"pandas\" or extra == \"ray\")" +markers = "python_version >= \"3.10\" and (extra == \"pandas\" or extra == \"ray\" or extra == \"bodo\")" files = [ {file = "numpy-2.2.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b412caa66f72040e6d268491a59f2c43bf03eb6c96dd8f0307829feb7fa2b6fb"}, {file = "numpy-2.2.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e41fd67c52b86603a91c1a505ebaef50b3314de0213461c7a6e99c9a3beff90"}, @@ -3865,7 +3865,7 @@ files = [ {file = "packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484"}, {file = "packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f"}, ] -markers = {main = "(extra == \"bigquery\" or extra == \"hf\") and (extra == \"bigquery\" or extra == \"hf\" or extra == \"ray\")"} +markers = {main = "extra == \"ray\" or extra == \"bigquery\" or extra == \"hf\""} [[package]] name = "paginate" @@ -3890,7 +3890,7 @@ description = "Powerful data structures for data analysis, time series, and stat optional = true python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"bodo\" or extra == \"pandas\" or extra == \"ray\"" +markers = "extra == \"pandas\" or extra == \"ray\" or extra == \"bodo\"" files = [ {file = "pandas-2.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:52bc29a946304c360561974c6542d1dd628ddafa69134a7131fdfd6a5d7a1a35"}, {file = "pandas-2.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:220cc5c35ffaa764dd5bb17cf42df283b5cb7fdf49e10a7b053a06c9cb48ee2b"}, @@ -3938,9 +3938,9 @@ files = [ [package.dependencies] numpy = [ - {version = ">=1.22.4", markers = "python_version < \"3.11\""}, - {version = ">=1.23.2", markers = "python_version == \"3.11\""}, {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, + {version = ">=1.23.2", markers = "python_version == \"3.11\""}, + {version = ">=1.22.4", markers = "python_version < \"3.11\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -4111,7 +4111,7 @@ description = "Accelerated property cache" optional = true python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\"" +markers = "extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\"" files = [ {file = "propcache-0.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:22d9962a358aedbb7a2e36187ff273adeaab9743373a272976d2e348d08c7770"}, {file = "propcache-0.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0d0fda578d1dc3f77b6b5a5dce3b9ad69a8250a891760a548df850a5e8da87f3"}, @@ -4220,7 +4220,7 @@ description = "Beautiful, Pythonic protocol buffers" optional = true python-versions = ">=3.7" groups = ["main"] -markers = "python_version < \"3.12\" and (extra == \"gcsfs\" or extra == \"bigquery\") or extra == \"bigquery\" or extra == \"gcsfs\"" +markers = "extra == \"bigquery\" or extra == \"gcsfs\"" files = [ {file = "proto_plus-1.26.1-py3-none-any.whl", hash = "sha256:13285478c2dcf2abb829db158e1047e2f1e8d63a077d94263c2b88b043c75a66"}, {file = "proto_plus-1.26.1.tar.gz", hash = "sha256:21a515a4c4c0088a773899e23c7bbade3d18f9c66c73edd4c7ee3816bc96a012"}, @@ -4239,7 +4239,7 @@ description = "" optional = true python-versions = ">=3.9" groups = ["main"] -markers = "(extra == \"gcsfs\" or extra == \"bigquery\") and (extra == \"gcsfs\" or extra == \"bigquery\" or extra == \"ray\")" +markers = "extra == \"ray\" or extra == \"bigquery\" or extra == \"gcsfs\"" files = [ {file = "protobuf-6.31.1-cp310-abi3-win32.whl", hash = "sha256:7fa17d5a29c2e04b7d90e5e32388b8bfd0e7107cd8e616feef7ed3fa6bdab5c9"}, {file = "protobuf-6.31.1-cp310-abi3-win_amd64.whl", hash = "sha256:426f59d2964864a1a366254fa703b8632dcec0790d8862d30034d8245e1cd447"}, @@ -4406,7 +4406,7 @@ description = "Python library for Apache Arrow" optional = true python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"bodo\" or extra == \"daft\" or extra == \"datafusion\" or extra == \"duckdb\" or extra == \"pandas\" or extra == \"pyarrow\" or extra == \"ray\"" +markers = "extra == \"pyarrow\" or extra == \"pandas\" or extra == \"duckdb\" or extra == \"ray\" or extra == \"bodo\" or extra == \"daft\" or extra == \"datafusion\"" files = [ {file = "pyarrow-19.0.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:fc28912a2dc924dddc2087679cc8b7263accc71b9ff025a1362b004711661a69"}, {file = "pyarrow-19.0.1-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:fca15aabbe9b8355800d923cc2e82c8ef514af321e18b437c3d782aa884eaeec"}, @@ -4462,7 +4462,7 @@ description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs optional = true python-versions = ">=3.8" groups = ["main"] -markers = "extra == \"gcsfs\" or extra == \"bigquery\" or extra == \"gcp-auth\"" +markers = "extra == \"gcp-auth\" or extra == \"bigquery\" or extra == \"gcsfs\"" files = [ {file = "pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629"}, {file = "pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034"}, @@ -4475,7 +4475,7 @@ description = "A collection of ASN.1-based protocols modules" optional = true python-versions = ">=3.8" groups = ["main"] -markers = "extra == \"gcsfs\" or extra == \"bigquery\" or extra == \"gcp-auth\"" +markers = "extra == \"gcp-auth\" or extra == \"bigquery\" or extra == \"gcsfs\"" files = [ {file = "pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a"}, {file = "pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6"}, @@ -4648,21 +4648,26 @@ windows-terminal = ["colorama (>=0.4.6)"] [[package]] name = "pyiceberg-core" -version = "0.6.0" +version = "0.7.0.dev20250921000225" description = "" optional = true python-versions = "~=3.9" groups = ["main"] markers = "extra == \"pyarrow\" or extra == \"pyiceberg-core\"" files = [ - {file = "pyiceberg_core-0.6.0-cp39-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:2f228a54a2a69912378be18f98ea866bb4a08d265c875856f99cd81f2f7299ba"}, - {file = "pyiceberg_core-0.6.0-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:edb41a1f182774085b11352a1f44955d561e21453f00973021244471873fbbd7"}, - {file = "pyiceberg_core-0.6.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5cf869d225d57254a54bc3778841cffea4193319bc0a849767a15e05e75c9b36"}, - {file = "pyiceberg_core-0.6.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:18c12fe1ac5b4725b673cf0d1d0ab3e9475644ac0dae871a2e9a293c2622f0a8"}, - {file = "pyiceberg_core-0.6.0-cp39-abi3-win_amd64.whl", hash = "sha256:d3249eeae5e1d1f1d2c8bd8d6eced98da002afa7c48c751cb22d8dbd4b091a1e"}, - {file = "pyiceberg_core-0.6.0.tar.gz", hash = "sha256:ce2cac8cf8a85da6e682cec032165fcf387256257971f0f84bc6d50c0941f261"}, + {file = "pyiceberg_core-0.7.0.dev20250921000225-cp39-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:5217181aa20c8c8c734b5e119187c99bbf7799211b6b2a7e46fdad0271c9f3c3"}, + {file = "pyiceberg_core-0.7.0.dev20250921000225-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d645431435e8884b5abcde1cf104dd30aa1505ec8360f89049f70870990d9f46"}, + {file = "pyiceberg_core-0.7.0.dev20250921000225-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:218aa7a8988634d98c8ab6076ba28f3544dd5f827e2a1c375e8b4e1188259cd3"}, + {file = "pyiceberg_core-0.7.0.dev20250921000225-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:adab26ca1c3f989a5b3fa0e5e3b8fa5febefe490c19c0be1f0d17ccc31d27761"}, + {file = "pyiceberg_core-0.7.0.dev20250921000225-cp39-abi3-win_amd64.whl", hash = "sha256:8796a53b4d47f5509c7e309230e2a6713d893bd3e088fd82e34ab0ffee8c8002"}, + {file = "pyiceberg_core-0.7.0.dev20250921000225.tar.gz", hash = "sha256:747633033ec85b4393de7dbeae8d7c62b41b17c498f1cebf7fa7443bf1e8bfd5"}, ] +[package.source] +type = "legacy" +url = "https://test.pypi.org/simple" +reference = "testpypi" + [[package]] name = "pyjwt" version = "2.10.1" @@ -4960,7 +4965,7 @@ description = "World timezone definitions, modern and historical" optional = true python-versions = "*" groups = ["main"] -markers = "extra == \"bodo\" or extra == \"pandas\" or extra == \"ray\"" +markers = "extra == \"pandas\" or extra == \"ray\" or extra == \"bodo\"" files = [ {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"}, {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"}, @@ -5519,7 +5524,7 @@ description = "Pure-Python RSA implementation" optional = true python-versions = "<4,>=3.6" groups = ["main"] -markers = "extra == \"gcsfs\" or extra == \"bigquery\" or extra == \"gcp-auth\"" +markers = "extra == \"gcp-auth\" or extra == \"bigquery\" or extra == \"gcsfs\"" files = [ {file = "rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762"}, {file = "rsa-4.9.1.tar.gz", hash = "sha256:e7bdbfdb5497da4c07dfd35530e1a902659db6ff241e39d9953cad06ebd0ae75"}, @@ -5561,7 +5566,7 @@ files = [ {file = "s3transfer-0.13.1-py3-none-any.whl", hash = "sha256:a981aa7429be23fe6dfc13e80e4020057cbab622b08c0315288758d67cabc724"}, {file = "s3transfer-0.13.1.tar.gz", hash = "sha256:c3fdba22ba1bd367922f27ec8032d6a1cf5f10c934fb5d68cf60fd5a23d936cf"}, ] -markers = {main = "extra == \"dynamodb\" or extra == \"glue\" or extra == \"rest-sigv4\""} +markers = {main = "extra == \"glue\" or extra == \"dynamodb\" or extra == \"rest-sigv4\""} [package.dependencies] botocore = ">=1.37.4,<2.0a.0" @@ -5993,7 +5998,7 @@ description = "Fast, Extensible Progress Meter" optional = true python-versions = ">=3.7" groups = ["main"] -markers = "extra == \"daft\" or extra == \"hf\"" +markers = "extra == \"hf\" or extra == \"daft\"" files = [ {file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"}, {file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"}, @@ -6044,7 +6049,7 @@ description = "Provider of IANA time zone data" optional = true python-versions = ">=2" groups = ["main"] -markers = "extra == \"bodo\" or extra == \"pandas\" or extra == \"ray\"" +markers = "extra == \"pandas\" or extra == \"ray\" or extra == \"bodo\"" files = [ {file = "tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8"}, {file = "tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9"}, @@ -6278,7 +6283,7 @@ description = "Yet another URL library" optional = true python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\"" +markers = "extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\"" files = [ {file = "yarl-1.20.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:6032e6da6abd41e4acda34d75a816012717000fa6839f37124a47fcefc49bec4"}, {file = "yarl-1.20.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2c7b34d804b8cf9b214f05015c4fee2ebe7ed05cf581e7192c06555c71f4446a"}, @@ -6554,4 +6559,4 @@ zstandard = ["zstandard"] [metadata] lock-version = "2.1" python-versions = "^3.9.2, !=3.9.7" -content-hash = "e88d9855b6de9f6c28d928e1be2ff05af18c9d58dfe0122ba79d7ccc13c5549f" +content-hash = "f39830e47911170be2b4bf61a66e9aa180b135a8f091e61b2019c9f045b32e7c" diff --git a/pyproject.toml b/pyproject.toml index 1f54c29059..58e22ea07c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -84,7 +84,7 @@ sqlalchemy = { version = "^2.0.18", optional = true } bodo = { version = ">=2025.7.4", optional = true } daft = { version = ">=0.5.0", optional = true } cachetools = ">=5.5,<7.0" -pyiceberg-core = { version = ">=0.5.1,<0.7.0", optional = true } +pyiceberg-core = { version = "==0.7.0.dev20250921000225", optional = true } polars = { version = "^1.21.0", optional = true } thrift-sasl = { version = ">=0.4.3", optional = true } kerberos = {version = "^1.3.1", optional = true} @@ -122,6 +122,12 @@ mkdocs-material = "9.6.20" mkdocs-material-extensions = "1.3.1" mkdocs-section-index = "0.3.10" + +[[tool.poetry.source]] +name = "testpypi" +url = "https://test.pypi.org/simple/" +priority = "supplemental" + [[tool.mypy.overrides]] module = "pytest_mock.*" ignore_missing_imports = true From 35bab2594e5943bd367218211b222bdeb441c069 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Sun, 21 Sep 2025 12:04:45 -0700 Subject: [PATCH 2/2] read manifest with pyiceberg-core --- pyiceberg/manifest.py | 57 +++++++++++++++++++++++++++--------- tests/avro/test_reader.py | 2 +- tests/conftest.py | 32 +++++++++++++++----- tests/utils/test_manifest.py | 10 +++---- 4 files changed, 74 insertions(+), 27 deletions(-) diff --git a/pyiceberg/manifest.py b/pyiceberg/manifest.py index 470c99d602..a7c27650b4 100644 --- a/pyiceberg/manifest.py +++ b/pyiceberg/manifest.py @@ -853,18 +853,47 @@ def fetch_manifest_entry(self, io: FileIO, discard_deleted: bool = True) -> List Returns: An Iterator of manifest entries. """ - input_file = io.new_input(self.manifest_path) - with AvroFile[ManifestEntry]( - input_file, - MANIFEST_ENTRY_SCHEMAS[DEFAULT_READ_VERSION], - read_types={-1: ManifestEntry, 2: DataFile}, - read_enums={0: ManifestEntryStatus, 101: FileFormat, 134: DataFileContent}, - ) as reader: - return [ - _inherit_from_manifest(entry, self) - for entry in reader - if not discard_deleted or entry.status != ManifestEntryStatus.DELETED - ] + from pyiceberg_core import manifest + + bs = io.new_input(self.manifest_path).open().read() + manifest = manifest.read_manifest_entries(bs) + + # TODO: Don't convert the types + # but this is the easiest for now until we + # have the write part in there as well + def _convert_entry(entry: Any) -> ManifestEntry: + data_file = DataFile( + DataFileContent(entry.data_file.content), + entry.data_file.file_path, + FileFormat(entry.data_file.file_format), + [p.value() if p is not None else None for p in entry.data_file.partition], + entry.data_file.record_count, + entry.data_file.file_size_in_bytes, + entry.data_file.column_sizes, + entry.data_file.value_counts, + entry.data_file.null_value_counts, + entry.data_file.nan_value_counts, + entry.data_file.lower_bounds, + entry.data_file.upper_bounds, + entry.data_file.key_metadata, + entry.data_file.split_offsets, + entry.data_file.equality_ids, + entry.data_file.sort_order_id, + ) + + return ManifestEntry( + ManifestEntryStatus(entry.status), + entry.snapshot_id, + entry.sequence_number, + entry.file_sequence_number, + data_file, + ) + + return [ + _inherit_from_manifest(_convert_entry(entry), self) + for entry in manifest.entries() + if not discard_deleted or entry.status != ManifestEntryStatus.DELETED + ] def __eq__(self, other: Any) -> bool: """Return the equality of two instances of the ManifestFile class.""" @@ -925,12 +954,12 @@ def _inherit_from_manifest(entry: ManifestEntry, manifest: ManifestFile) -> Mani # in v1 tables, the sequence number is not persisted and can be safely defaulted to 0 # in v2 tables, the sequence number should be inherited iff the entry status is ADDED - if entry.sequence_number is None and (manifest.sequence_number == 0 or entry.status == ManifestEntryStatus.ADDED): + if entry.sequence_number is None or (manifest.sequence_number == 0 or entry.status == ManifestEntryStatus.ADDED): entry.sequence_number = manifest.sequence_number # in v1 tables, the file sequence number is not persisted and can be safely defaulted to 0 # in v2 tables, the file sequence number should be inherited iff the entry status is ADDED - if entry.file_sequence_number is None and (manifest.sequence_number == 0 or entry.status == ManifestEntryStatus.ADDED): + if entry.file_sequence_number is None or (manifest.sequence_number == 0 or entry.status == ManifestEntryStatus.ADDED): # Only available in V2, always 0 in V1 entry.file_sequence_number = manifest.sequence_number diff --git a/tests/avro/test_reader.py b/tests/avro/test_reader.py index 82473d11d1..c47b27e581 100644 --- a/tests/avro/test_reader.py +++ b/tests/avro/test_reader.py @@ -112,7 +112,7 @@ def test_read_header(generated_manifest_entry_file: str, iceberg_manifest_entry_ { "field-id": 1001, "default": None, - "name": "tpep_pickup_datetime", + "name": "tpep_pickup_day", "type": ["null", {"type": "int", "logicalType": "date"}], }, ], diff --git a/tests/conftest.py b/tests/conftest.py index 5aff45c1ed..af5690b79a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1178,7 +1178,7 @@ def metadata_location_gz(tmp_path_factory: pytest.TempPathFactory) -> str: "data_file": { "file_path": "/home/iceberg/warehouse/nyc/taxis_partitioned/data/VendorID=null/00000-633-d8a4223e-dc97-45a1-86e1-adaba6e8abd7-00001.parquet", "file_format": "PARQUET", - "partition": {"VendorID": 1, "tpep_pickup_datetime": 1925}, + "partition": {"VendorID": 1, "tpep_pickup_day": 1925}, "record_count": 19513, "file_size_in_bytes": 388872, "block_size_in_bytes": 67108864, @@ -1298,7 +1298,7 @@ def metadata_location_gz(tmp_path_factory: pytest.TempPathFactory) -> str: "data_file": { "file_path": "/home/iceberg/warehouse/nyc/taxis_partitioned/data/VendorID=1/00000-633-d8a4223e-dc97-45a1-86e1-adaba6e8abd7-00002.parquet", "file_format": "PARQUET", - "partition": {"VendorID": 1, "tpep_pickup_datetime": None}, + "partition": {"VendorID": 1, "tpep_pickup_day": None}, "record_count": 95050, "file_size_in_bytes": 1265950, "block_size_in_bytes": 67108864, @@ -1383,7 +1383,7 @@ def metadata_location_gz(tmp_path_factory: pytest.TempPathFactory) -> str: {"key": 3, "value": b"\x01\x00\x00\x00\x00\x00\x00\x00"}, {"key": 4, "value": b"\x00\x00\x00\x00"}, {"key": 5, "value": b"\x01\x00\x00\x00"}, - {"key": 6, "value": b"N"}, + {"key": 6, "value": b"\x01\x00\x00\x00"}, {"key": 7, "value": b"\x01\x00\x00\x00"}, {"key": 8, "value": b"\x01\x00\x00\x00"}, {"key": 9, "value": b"\x01\x00\x00\x00"}, @@ -1403,7 +1403,7 @@ def metadata_location_gz(tmp_path_factory: pytest.TempPathFactory) -> str: {"key": 3, "value": b"\x06\x00\x00\x00\x00\x00\x00\x00"}, {"key": 4, "value": b"\x06\x00\x00\x00"}, {"key": 5, "value": b"c\x00\x00\x00"}, - {"key": 6, "value": b"Y"}, + {"key": 6, "value": b"c\x00\x00\x00"}, {"key": 7, "value": b"\t\x01\x00\x00"}, {"key": 8, "value": b"\t\x01\x00\x00"}, {"key": 9, "value": b"\x04\x00\x00\x00"}, @@ -1677,7 +1677,7 @@ def avro_schema_manifest_entry() -> Dict[str, Any]: { "field-id": 1001, "default": None, - "name": "tpep_pickup_datetime", + "name": "tpep_pickup_day", "type": ["null", {"type": "int", "logicalType": "date"}], }, ], @@ -1863,7 +1863,25 @@ def simple_map() -> MapType: @pytest.fixture(scope="session") def test_schema() -> Schema: return Schema( - NestedField(1, "VendorID", IntegerType(), False), NestedField(2, "tpep_pickup_datetime", TimestampType(), False) + NestedField(1, "VendorID", IntegerType(), False), + NestedField(2, "tpep_pickup_datetime", TimestampType(), False), + NestedField(3, "tpep_dropoff_datetime", TimestampType(), False), + NestedField(4, "passenger_count", LongType(), False), + NestedField(5, "trip_distance", DoubleType(), False), + NestedField(6, "RatecodeID", DoubleType(), False), + NestedField(7, "store_and_fwd_flag", StringType(), False), + NestedField(8, "PULocationID", IntegerType(), False), + NestedField(9, "DOLocationID", IntegerType(), False), + NestedField(10, "payment_type", LongType(), False), + NestedField(11, "fare_amount", DoubleType(), False), + NestedField(12, "extra", DoubleType(), False), + NestedField(13, "mta_tax", DoubleType(), False), + NestedField(14, "tip_amount", DoubleType(), False), + NestedField(15, "tolls_amount", DoubleType(), False), + NestedField(16, "improvement_surcharge", DoubleType(), False), + NestedField(17, "total_amount", DoubleType(), False), + NestedField(18, "congestion_surcharge", DoubleType(), False), + NestedField(19, "Airport_fee", DoubleType(), False), ) @@ -1969,7 +1987,7 @@ def iceberg_manifest_entry_schema() -> Schema: ), NestedField( field_id=1001, - name="tpep_pickup_datetime", + name="tpep_pickup_day", field_type=DateType(), required=False, ), diff --git a/tests/utils/test_manifest.py b/tests/utils/test_manifest.py index 7c62b9564c..f146cf0cb6 100644 --- a/tests/utils/test_manifest.py +++ b/tests/utils/test_manifest.py @@ -41,7 +41,7 @@ from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec from pyiceberg.schema import Schema from pyiceberg.table.snapshots import Operation, Snapshot, Summary -from pyiceberg.typedef import Record, TableVersion +from pyiceberg.typedef import TableVersion from pyiceberg.types import IntegerType, NestedField @@ -85,7 +85,7 @@ def test_read_manifest_entry(generated_manifest_entry_file: str) -> None: == "/home/iceberg/warehouse/nyc/taxis_partitioned/data/VendorID=null/00000-633-d8a4223e-dc97-45a1-86e1-adaba6e8abd7-00001.parquet" ) assert data_file.file_format == FileFormat.PARQUET - assert repr(data_file.partition) == "Record[1, 1925]" + assert data_file.partition == [1, 1925] assert data_file.record_count == 19513 assert data_file.file_size_in_bytes == 388872 assert data_file.column_sizes == { @@ -184,7 +184,7 @@ def test_read_manifest_entry(generated_manifest_entry_file: str) -> None: } assert data_file.key_metadata is None assert data_file.split_offsets == [4] - assert data_file.equality_ids is None + assert data_file.equality_ids == [] assert data_file.sort_order_id == 0 @@ -422,7 +422,7 @@ def test_write_manifest( == "/home/iceberg/warehouse/nyc/taxis_partitioned/data/VendorID=null/00000-633-d8a4223e-dc97-45a1-86e1-adaba6e8abd7-00001.parquet" ) assert data_file.file_format == FileFormat.PARQUET - assert data_file.partition == Record(1, 1925) + assert data_file.partition == [1, 1925] assert data_file.record_count == 19513 assert data_file.file_size_in_bytes == 388872 assert data_file.column_sizes == { @@ -521,7 +521,7 @@ def test_write_manifest( } assert data_file.key_metadata is None assert data_file.split_offsets == [4] - assert data_file.equality_ids is None + assert data_file.equality_ids == [] assert data_file.sort_order_id == 0