diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..c6abb4b5 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,16 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file + +version: 2 +updates: + - package-ecosystem: 'github-actions' # See documentation for possible values + directory: '/' # Location of package manifests + schedule: + interval: 'weekly' + + - package-ecosystem: 'cargo' # See documentation for possible values + directory: '/' # Location of package manifests + schedule: + interval: 'weekly' diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 79584efd..e7abb023 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -13,10 +13,13 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - uses: dtolnay/rust-toolchain@stable + - name: Cache Rust dependencies + uses: Swatinem/rust-cache@v2 + - name: Install Protoc uses: arduino/setup-protoc@v3 with: @@ -25,132 +28,125 @@ jobs: - run: cargo clippy --all-features -- -D warnings build: - name: Build - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - - uses: dtolnay/rust-toolchain@stable - - # Putting this into a GitHub Actions matrix will run a separate job per matrix item, whereas in theory - # this can re-use the existing build cache to go faster. - - name: Build without default features - run: cargo check --no-default-features - - - name: Build with only duckdb - run: cargo check --no-default-features --features duckdb - - - name: Build with only postgres - run: cargo check --no-default-features --features postgres - - - name: Build with only sqlite - run: cargo check --no-default-features --features sqlite - - - name: Build with only mysql - run: cargo check --no-default-features --features mysql - - integration-test-mysql: - name: Tests mysql + name: Build (${{ matrix.features }}) runs-on: ubuntu-latest - env: - MYSQL_DOCKER_IMAGE: public.ecr.aws/ubuntu/mysql:8.0-22.04_beta + strategy: + matrix: + features: + - 'no-default-features' + - 'clickhouse' + - 'duckdb' + - 'postgres' + - 'sqlite' + - 'mysql' + - 'flight' steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - uses: dtolnay/rust-toolchain@stable - - name: Pull the MySQL images - run: | - docker pull ${{ env.MYSQL_DOCKER_IMAGE }} + - name: Cache Rust dependencies + uses: Swatinem/rust-cache@v2 - - uses: ./.github/actions/setup-integration-test - - - name: Run tests - run: cargo test --features mysql - - integration-test-postgres: - name: Tests postgres + - name: Build with ${{ matrix.features }} + run: | + if [ "${{ matrix.features }}" = "no-default-features" ]; then + cargo check --no-default-features + else + cargo check --no-default-features --features ${{ matrix.features }} + fi + + integration-test: + name: Tests runs-on: ubuntu-latest env: - PG_DOCKER_IMAGE: public.ecr.aws/docker/library/postgres:latest + PG_DOCKER_IMAGE: ghcr.io/cloudnative-pg/postgresql:16-bookworm + MYSQL_DOCKER_IMAGE: public.ecr.aws/ubuntu/mysql:8.0-22.04_beta steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - uses: dtolnay/rust-toolchain@stable - - uses: ./.github/actions/setup-integration-test + - name: Cache Rust dependencies + uses: Swatinem/rust-cache@v2 - - name: Pull the Postgres images + - name: Pull the Postgres/MySQL images run: | docker pull ${{ env.PG_DOCKER_IMAGE }} + docker pull ${{ env.MYSQL_DOCKER_IMAGE }} - - name: Run tests - run: cargo test --features postgres - - integration-test-sqlite: - name: Tests sqlite - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - - uses: dtolnay/rust-toolchain@stable - - - uses: ./.github/actions/setup-integration-test - - - name: Run tests - run: cargo test --features sqlite - - integration-test-duckdb: - name: Tests duckdb - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - - uses: dtolnay/rust-toolchain@stable - - - uses: ./.github/actions/setup-integration-test + - name: Free Disk Space + run: | + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + sudo rm -rf \ + /usr/share/dotnet /usr/local/lib/android /opt/ghc \ + /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup \ + /usr/lib/jvm || true + echo "some directories deleted" + sudo apt install aptitude -y >/dev/null 2>&1 + sudo aptitude purge aria2 ansible azure-cli shellcheck rpm xorriso zsync \ + esl-erlang firefox gfortran-8 gfortran-9 google-chrome-stable \ + google-cloud-sdk imagemagick \ + libmagickcore-dev libmagickwand-dev libmagic-dev ant ant-optional kubectl \ + mercurial apt-transport-https mono-complete libmysqlclient \ + yarn chrpath libssl-dev libxft-dev \ + libfreetype6 libfreetype6-dev libfontconfig1 libfontconfig1-dev \ + snmp pollinate libpq-dev postgresql-client powershell ruby-full \ + sphinxsearch subversion mongodb-org azure-cli microsoft-edge-stable \ + -y -f >/dev/null 2>&1 + sudo aptitude purge google-cloud-sdk -f -y >/dev/null 2>&1 + sudo aptitude purge microsoft-edge-stable -f -y >/dev/null 2>&1 || true + sudo apt purge microsoft-edge-stable -f -y >/dev/null 2>&1 || true + sudo aptitude purge '~n ^php' -f -y >/dev/null 2>&1 + sudo aptitude purge '~n ^dotnet' -f -y >/dev/null 2>&1 + sudo apt-get autoremove -y >/dev/null 2>&1 + sudo apt-get autoclean -y >/dev/null 2>&1 + echo "some packages purged" + df -h + + - name: Install ODBC & Sqlite + run: | + sudo apt-get install -y unixodbc-dev + sudo apt-get install -y libsqlite3-dev - name: Run tests - run: cargo test --features duckdb + run: make test - integration-test-flight: - name: Tests flight + python-integration-test: + name: Python Tests runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - uses: dtolnay/rust-toolchain@stable - - uses: ./.github/actions/setup-integration-test - - - name: Run tests - run: cargo test --features flight - - integration-test-mongodb: - name: Tests mongoDB - runs-on: ubuntu-latest - - env: - MONGODB_DOCKER_IMAGE: public.ecr.aws/docker/library/mongo:7 - - steps: - - uses: actions/checkout@v4 - - - uses: dtolnay/rust-toolchain@stable + - name: Cache Rust dependencies + uses: Swatinem/rust-cache@v2 + with: + workspaces: | + . + python - - uses: ./.github/actions/setup-integration-test + - uses: astral-sh/setup-uv@v7 + with: + enable-cache: true + cache-dependency-glob: 'python/pyproject.toml' - - name: Pull the MongoDB images + - name: Install ODBC, Sqlite and Roapi run: | - docker pull ${{ env.MONGODB_DOCKER_IMAGE }} + sudo apt-get install -y unixodbc-dev libsqliteodbc + sudo apt-get install -y libsqlite3-dev + cargo install --locked --git https://github.com/roapi/roapi --branch main --bins roapi - - name: Run tests - run: cargo test --features mongodb + - name: Build Python package and run tests + run: | + cd python + uv sync --dev --no-install-package datafusion + uv run --no-project maturin develop --uv --release + cd python/tests + uv run --no-project pytest -v . diff --git a/.gitignore b/.gitignore index 8fb64652..bc7a0a21 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ # will have compiled files and executables debug/ target/ +.cargo # These are backup files generated by rustfmt **/*.rs.bk @@ -17,3 +18,4 @@ target/ .DS_Store .idea +.vscode diff --git a/Cargo.lock b/Cargo.lock index d32e1eec..65518e2c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,54 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "abi_stable" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69d6512d3eb05ffe5004c59c206de7f99c34951504056ce23fc953842f12c445" +dependencies = [ + "abi_stable_derive", + "abi_stable_shared", + "const_panic", + "core_extensions", + "crossbeam-channel", + "generational-arena", + "libloading 0.7.4", + "lock_api", + "parking_lot", + "paste", + "repr_offset", + "rustc_version", + "serde", + "serde_derive", + "serde_json", +] + +[[package]] +name = "abi_stable_derive" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7178468b407a4ee10e881bc7a328a65e739f0863615cca4429d43916b05e898" +dependencies = [ + "abi_stable_shared", + "as_derive_utils", + "core_extensions", + "proc-macro2", + "quote", + "rustc_version", + "syn 1.0.109", + "typed-arena", +] + +[[package]] +name = "abi_stable_shared" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2b5df7688c123e63f4d4d649cba63f2967ba7f7861b1664fca3f77d3dad2b63" +dependencies = [ + "core_extensions", +] + [[package]] name = "adler2" version = "2.0.1" @@ -63,6 +111,33 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" +[[package]] +name = "android-activity" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef6978589202a00cd7e118380c448a08b6ed394c3a8df3a430d0898e3a42d046" +dependencies = [ + "android-properties", + "bitflags 2.10.0", + "cc", + "cesu8", + "jni", + "jni-sys", + "libc", + "log", + "ndk", + "ndk-context", + "ndk-sys", + "num_enum", + "thiserror 1.0.69", +] + +[[package]] +name = "android-properties" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc7eb209b1518d6bb87b283c20095f5228ecda460da70b44f0802523dea6da04" + [[package]] name = "android_system_properties" version = "0.1.5" @@ -163,6 +238,7 @@ dependencies = [ "arrow-ipc", "arrow-json", "arrow-ord", + "arrow-pyarrow", "arrow-row", "arrow-schema", "arrow-select", @@ -274,7 +350,7 @@ dependencies = [ "futures", "once_cell", "paste", - "prost", + "prost 0.13.5", "prost-types", "tonic", ] @@ -314,6 +390,20 @@ dependencies = [ "simdutf8", ] +[[package]] +name = "arrow-odbc" +version = "20.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df789a17037625211fc85f373c339cd12b0030d9eebac532f9d3c7912ccd65e1" +dependencies = [ + "arrow", + "atoi", + "chrono", + "log", + "odbc-api 17.0.0", + "thiserror 2.0.17", +] + [[package]] name = "arrow-ord" version = "55.2.0" @@ -326,6 +416,17 @@ dependencies = [ "arrow-select", ] +[[package]] +name = "arrow-pyarrow" +version = "55.2.0" +source = "git+https://github.com/spiceai/arrow-rs.git?rev=53162ed30fe6a2ed219b0af4dbbcd5d14745d7c2#53162ed30fe6a2ed219b0af4dbbcd5d14745d7c2" +dependencies = [ + "arrow-array", + "arrow-data", + "arrow-schema", + "pyo3", +] + [[package]] name = "arrow-row" version = "55.2.0" @@ -343,7 +444,7 @@ name = "arrow-schema" version = "55.2.0" source = "git+https://github.com/spiceai/arrow-rs.git?rev=53162ed30fe6a2ed219b0af4dbbcd5d14745d7c2#53162ed30fe6a2ed219b0af4dbbcd5d14745d7c2" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", "serde", "serde_json", ] @@ -377,6 +478,18 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "as_derive_utils" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff3c96645900a44cf11941c111bd08a6573b0e2f9f69bc9264b179d8fae753c4" +dependencies = [ + "core_extensions", + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "async-compression" version = "0.4.19" @@ -394,6 +507,15 @@ dependencies = [ "zstd-safe", ] +[[package]] +name = "async-ffi" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4de21c0feef7e5a556e51af767c953f0501f7f300ba785cc99c47bdc8081a50" +dependencies = [ + "abi_stable", +] + [[package]] name = "async-stream" version = "0.3.6" @@ -501,12 +623,6 @@ version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" -[[package]] -name = "base64" -version = "0.21.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" - [[package]] name = "base64" version = "0.22.1" @@ -515,11 +631,10 @@ checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] name = "bb8" -version = "0.8.6" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d89aabfae550a5c44b43ab941844ffcd2e993cb6900b342debf59e9ea74acdb8" +checksum = "212d8b8e1a22743d9241575c6ba822cf9c8fef34771c86ab7e477a4fbfd254e5" dependencies = [ - "async-trait", "futures-util", "parking_lot", "tokio", @@ -527,11 +642,10 @@ dependencies = [ [[package]] name = "bb8-postgres" -version = "0.8.1" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56ac82c42eb30889b5c4ee4763a24b8c566518171ebea648cd7e3bc532c60680" +checksum = "e570e6557cd0f88d28d32afa76644873271a70dc22656df565b2021c4036aa9c" dependencies = [ - "async-trait", "bb8", "tokio", "tokio-postgres", @@ -539,9 +653,9 @@ dependencies = [ [[package]] name = "bigdecimal" -version = "0.4.8" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a22f228ab7a1b23027ccc6c350b72868017af7ea8356fbdf19f8d991c690013" +checksum = "560f42649de9fa436b73517378a147ec21f6c997a546581df4b4b31677828934" dependencies = [ "autocfg", "libm", @@ -558,9 +672,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.9.4" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" [[package]] name = "bitvec" @@ -605,11 +719,20 @@ dependencies = [ "generic-array", ] +[[package]] +name = "block2" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c132eebf10f5cad5289222520a4a058514204aed6d791f1cf4fe8088b82d15f" +dependencies = [ + "objc2", +] + [[package]] name = "bollard" -version = "0.16.1" +version = "0.19.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0aed08d3adb6ebe0eff737115056652670ae290f177759aac19c30456135f94c" +checksum = "ec7646ee90964aa59e9f832a67182791396a19a5b1d76eb17599a8310a7e2e09" dependencies = [ "base64 0.22.1", "bollard-stubs", @@ -622,7 +745,7 @@ dependencies = [ "hyper", "hyper-named-pipe", "hyper-util", - "hyperlocal-next", + "hyperlocal", "log", "pin-project-lite", "serde", @@ -630,7 +753,7 @@ dependencies = [ "serde_json", "serde_repr", "serde_urlencoded", - "thiserror 1.0.69", + "thiserror 2.0.17", "tokio", "tokio-util", "tower-service", @@ -640,11 +763,12 @@ dependencies = [ [[package]] name = "bollard-stubs" -version = "1.44.0-rc.2" +version = "1.49.1-rc.28.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "709d9aa1c37abb89d40f19f5d0ad6f0d88cb1581264e571c9350fc5bb89cf1c5" +checksum = "5731fe885755e92beff1950774068e0cae67ea6ec7587381536fca84f1779623" dependencies = [ "serde", + "serde_json", "serde_repr", "serde_with", ] @@ -716,6 +840,15 @@ dependencies = [ "uuid", ] +[[package]] +name = "bstr" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4" +dependencies = [ + "memchr", +] + [[package]] name = "btoi" version = "0.4.3" @@ -804,6 +937,20 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "calloop" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b99da2f8558ca23c71f4fd15dc57c906239752dd27ff3c00a1d56b685b7cbfec" +dependencies = [ + "bitflags 2.10.0", + "log", + "polling", + "rustix 0.38.44", + "slab", + "thiserror 1.0.69", +] + [[package]] name = "cast" version = "0.3.0" @@ -822,6 +969,12 @@ dependencies = [ "shlex", ] +[[package]] +name = "cesu8" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c" + [[package]] name = "cfg-if" version = "1.0.4" @@ -859,12 +1012,46 @@ dependencies = [ ] [[package]] -name = "cmake" -version = "0.1.54" +name = "cityhash-rs" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93a719913643003b84bd13022b4b7e703c09342cd03b679c4641c7d2e50dc34d" + +[[package]] +name = "clickhouse" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7caa3f9de89ddbe2c607f4101924c5abec803763ae9534e4f4d7d8f84aa81f0" +checksum = "9a9a81a1dffadd762ee662635ce409232258ce9beebd7cc0fa227df0b5e7efc0" dependencies = [ - "cc", + "bstr", + "bytes", + "cityhash-rs", + "clickhouse-derive", + "futures", + "futures-channel", + "http-body-util", + "hyper", + "hyper-util", + "lz4_flex", + "replace_with", + "sealed", + "serde", + "static_assertions", + "thiserror 1.0.69", + "tokio", + "url", +] + +[[package]] +name = "clickhouse-derive" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d70f3e2893f7d3e017eeacdc9a708fbc29a10488e3ebca21f9df6a5d2b616dbb" +dependencies = [ + "proc-macro2", + "quote", + "serde_derive_internals", + "syn 2.0.107", ] [[package]] @@ -873,6 +1060,16 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +[[package]] +name = "combine" +version = "4.6.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba5a308b75df32fe02788e748662718f03fde005016435c444eea572398219fd" +dependencies = [ + "bytes", + "memchr", +] + [[package]] name = "comfy-table" version = "7.2.1" @@ -883,6 +1080,15 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "concurrent-queue" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "console" version = "0.15.11" @@ -915,6 +1121,15 @@ dependencies = [ "tiny-keccak", ] +[[package]] +name = "const_panic" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e262cdaac42494e3ae34c43969f9cdeb7da178bdb4b66fa6a1ea2edb4c8ae652" +dependencies = [ + "typewit", +] + [[package]] name = "constant_time_eq" version = "0.3.1" @@ -937,6 +1152,16 @@ dependencies = [ "libc", ] +[[package]] +name = "core-foundation" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "core-foundation-sys" version = "0.8.7" @@ -944,60 +1169,67 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] -name = "cpufeatures" -version = "0.2.17" +name = "core-graphics" +version = "0.23.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +checksum = "c07782be35f9e1140080c6b96f0d44b739e2278479f64e02fdab4e32dfd8b081" dependencies = [ + "bitflags 1.3.2", + "core-foundation 0.9.4", + "core-graphics-types", + "foreign-types 0.5.0", "libc", ] [[package]] -name = "crc32fast" -version = "1.5.0" +name = "core-graphics-types" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +checksum = "45390e6114f68f718cc7a830514a96f903cccd70d02a8f6d9f643ac4ba45afaf" dependencies = [ - "cfg-if", + "bitflags 1.3.2", + "core-foundation 0.9.4", + "libc", ] [[package]] -name = "crossbeam" -version = "0.8.4" +name = "core_extensions" +version = "1.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8" +checksum = "42bb5e5d0269fd4f739ea6cedaf29c16d81c27a7ce7582008e90eb50dcd57003" dependencies = [ - "crossbeam-channel", - "crossbeam-deque", - "crossbeam-epoch", - "crossbeam-queue", - "crossbeam-utils", + "core_extensions_proc_macros", ] [[package]] -name = "crossbeam-channel" -version = "0.5.15" +name = "core_extensions_proc_macros" +version = "1.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +checksum = "533d38ecd2709b7608fb8e18e4504deb99e9a72879e6aa66373a76d8dc4259ea" + +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" dependencies = [ - "crossbeam-utils", + "libc", ] [[package]] -name = "crossbeam-deque" -version = "0.8.6" +name = "crc32fast" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" dependencies = [ - "crossbeam-epoch", - "crossbeam-utils", + "cfg-if", ] [[package]] -name = "crossbeam-epoch" -version = "0.9.18" +name = "crossbeam-channel" +version = "0.5.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" dependencies = [ "crossbeam-utils", ] @@ -1054,6 +1286,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "cursor-icon" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f27ae1dd37df86211c42e150270f82743308803d90a6f6e6651cd730d5e1732f" + [[package]] name = "darling" version = "0.20.11" @@ -1158,7 +1396,7 @@ dependencies = [ "chrono", "datafusion-catalog", "datafusion-catalog-listing", - "datafusion-common", + "datafusion-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-common-runtime", "datafusion-datasource", "datafusion-datasource-csv", @@ -1166,7 +1404,7 @@ dependencies = [ "datafusion-datasource-parquet", "datafusion-execution", "datafusion-expr", - "datafusion-expr-common", + "datafusion-expr-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-functions", "datafusion-functions-aggregate", "datafusion-functions-nested", @@ -1174,7 +1412,7 @@ dependencies = [ "datafusion-functions-window", "datafusion-optimizer", "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-physical-expr-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-physical-optimizer", "datafusion-physical-plan", "datafusion-session", @@ -1182,13 +1420,14 @@ dependencies = [ "flate2", "futures", "hex", - "itertools 0.14.0", + "itertools", "log", "object_store", "parking_lot", "parquet", "rand 0.9.2", "regex", + "serde", "sqlparser", "tempfile", "tokio", @@ -1206,7 +1445,7 @@ dependencies = [ "arrow", "async-trait", "dashmap", - "datafusion-common", + "datafusion-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", @@ -1216,7 +1455,7 @@ dependencies = [ "datafusion-session", "datafusion-sql", "futures", - "itertools 0.14.0", + "itertools", "log", "object_store", "parking_lot", @@ -1231,12 +1470,12 @@ dependencies = [ "arrow", "async-trait", "datafusion-catalog", - "datafusion-common", + "datafusion-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-datasource", "datafusion-execution", "datafusion-expr", "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-physical-expr-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-physical-plan", "datafusion-session", "futures", @@ -1245,6 +1484,28 @@ dependencies = [ "tokio", ] +[[package]] +name = "datafusion-common" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "765e4ad4ef7a4500e389a3f1e738791b71ff4c29fd00912c2f541d62b25da096" +dependencies = [ + "ahash 0.8.12", + "arrow", + "arrow-ipc", + "base64 0.22.1", + "chrono", + "half", + "hashbrown 0.14.5", + "indexmap 2.12.0", + "libc", + "log", + "paste", + "sqlparser", + "tokio", + "web-time", +] + [[package]] name = "datafusion-common" version = "49.0.2" @@ -1264,6 +1525,7 @@ dependencies = [ "object_store", "parquet", "paste", + "pyo3", "recursive", "sqlparser", "tokio", @@ -1291,18 +1553,18 @@ dependencies = [ "bytes", "bzip2 0.6.1", "chrono", - "datafusion-common", + "datafusion-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-physical-expr-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-physical-plan", "datafusion-session", "flate2", "futures", "glob", - "itertools 0.14.0", + "itertools", "log", "object_store", "parquet", @@ -1324,13 +1586,13 @@ dependencies = [ "async-trait", "bytes", "datafusion-catalog", - "datafusion-common", + "datafusion-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", "datafusion-expr", "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-physical-expr-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-physical-plan", "datafusion-session", "futures", @@ -1348,13 +1610,13 @@ dependencies = [ "async-trait", "bytes", "datafusion-catalog", - "datafusion-common", + "datafusion-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", "datafusion-expr", "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-physical-expr-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-physical-plan", "datafusion-session", "futures", @@ -1372,21 +1634,21 @@ dependencies = [ "async-trait", "bytes", "datafusion-catalog", - "datafusion-common", + "datafusion-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", "datafusion-expr", "datafusion-functions-aggregate", "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-physical-expr-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-physical-optimizer", "datafusion-physical-plan", "datafusion-pruning", "datafusion-session", "futures", "hex", - "itertools 0.14.0", + "itertools", "log", "object_store", "parking_lot", @@ -1407,7 +1669,7 @@ source = "git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c23 dependencies = [ "arrow", "dashmap", - "datafusion-common", + "datafusion-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-expr", "futures", "log", @@ -1426,12 +1688,12 @@ dependencies = [ "arrow", "async-trait", "chrono", - "datafusion-common", + "datafusion-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-doc", - "datafusion-expr-common", - "datafusion-functions-aggregate-common", + "datafusion-expr-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", + "datafusion-functions-aggregate-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-functions-window-common", - "datafusion-physical-expr-common", + "datafusion-physical-expr-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "indexmap 2.12.0", "paste", "recursive", @@ -1439,15 +1701,28 @@ dependencies = [ "sqlparser", ] +[[package]] +name = "datafusion-expr-common" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d5c267104849d5fa6d81cf5ba88f35ecd58727729c5eb84066c25227b644ae2" +dependencies = [ + "arrow", + "datafusion-common 49.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "indexmap 2.12.0", + "itertools", + "paste", +] + [[package]] name = "datafusion-expr-common" version = "49.0.2" source = "git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9#912eebec159e037c7c233aae35c090071675d5a9" dependencies = [ "arrow", - "datafusion-common", + "datafusion-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "indexmap 2.12.0", - "itertools 0.14.0", + "itertools", "paste", ] @@ -1463,6 +1738,28 @@ dependencies = [ "futures", ] +[[package]] +name = "datafusion-ffi" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec21805d9df2d834e4c6ddfbf8a1bed2bd460b89b01686fe0dcd1cee06d0b60f" +dependencies = [ + "abi_stable", + "arrow", + "arrow-schema", + "async-ffi", + "async-trait", + "datafusion", + "datafusion-functions-aggregate-common 49.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-proto", + "datafusion-proto-common 49.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "futures", + "log", + "prost 0.13.5", + "semver", + "tokio", +] + [[package]] name = "datafusion-functions" version = "49.0.2" @@ -1474,14 +1771,14 @@ dependencies = [ "blake2", "blake3", "chrono", - "datafusion-common", + "datafusion-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-doc", "datafusion-execution", "datafusion-expr", - "datafusion-expr-common", + "datafusion-expr-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-macros", "hex", - "itertools 0.14.0", + "itertools", "log", "md-5", "rand 0.9.2", @@ -1498,14 +1795,14 @@ source = "git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c23 dependencies = [ "ahash 0.8.12", "arrow", - "datafusion-common", + "datafusion-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-doc", "datafusion-execution", "datafusion-expr", - "datafusion-functions-aggregate-common", + "datafusion-functions-aggregate-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-macros", "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-physical-expr-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "half", "log", "paste", @@ -1514,32 +1811,45 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" version = "49.0.2" -source = "git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9#912eebec159e037c7c233aae35c090071675d5a9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13efdb17362be39b5024f6da0d977ffe49c0212929ec36eec550e07e2bc7812f" dependencies = [ "ahash 0.8.12", "arrow", - "datafusion-common", - "datafusion-expr-common", - "datafusion-physical-expr-common", + "datafusion-common 49.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-expr-common 49.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-expr-common 49.0.2 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] -name = "datafusion-functions-nested" +name = "datafusion-functions-aggregate-common" +version = "49.0.2" +source = "git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9#912eebec159e037c7c233aae35c090071675d5a9" +dependencies = [ + "ahash 0.8.12", + "arrow", + "datafusion-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", + "datafusion-expr-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", + "datafusion-physical-expr-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", +] + +[[package]] +name = "datafusion-functions-nested" version = "49.0.2" source = "git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9#912eebec159e037c7c233aae35c090071675d5a9" dependencies = [ "arrow", "arrow-ord", - "datafusion-common", + "datafusion-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-doc", "datafusion-execution", "datafusion-expr", "datafusion-functions", "datafusion-functions-aggregate", - "datafusion-functions-aggregate-common", + "datafusion-functions-aggregate-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-macros", - "datafusion-physical-expr-common", - "itertools 0.14.0", + "datafusion-physical-expr-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", + "itertools", "log", "paste", ] @@ -1552,7 +1862,7 @@ dependencies = [ "arrow", "async-trait", "datafusion-catalog", - "datafusion-common", + "datafusion-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-expr", "datafusion-physical-plan", "parking_lot", @@ -1565,13 +1875,13 @@ version = "49.0.2" source = "git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9#912eebec159e037c7c233aae35c090071675d5a9" dependencies = [ "arrow", - "datafusion-common", + "datafusion-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-doc", "datafusion-expr", "datafusion-functions-window-common", "datafusion-macros", "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-physical-expr-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "log", "paste", ] @@ -1581,8 +1891,8 @@ name = "datafusion-functions-window-common" version = "49.0.2" source = "git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9#912eebec159e037c7c233aae35c090071675d5a9" dependencies = [ - "datafusion-common", - "datafusion-physical-expr-common", + "datafusion-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", + "datafusion-physical-expr-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", ] [[package]] @@ -1602,12 +1912,12 @@ source = "git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c23 dependencies = [ "arrow", "chrono", - "datafusion-common", + "datafusion-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-expr", - "datafusion-expr-common", + "datafusion-expr-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-physical-expr", "indexmap 2.12.0", - "itertools 0.14.0", + "itertools", "log", "recursive", "regex", @@ -1621,20 +1931,34 @@ source = "git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c23 dependencies = [ "ahash 0.8.12", "arrow", - "datafusion-common", + "datafusion-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-expr", - "datafusion-expr-common", - "datafusion-functions-aggregate-common", - "datafusion-physical-expr-common", + "datafusion-expr-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", + "datafusion-functions-aggregate-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", + "datafusion-physical-expr-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "half", "hashbrown 0.14.5", "indexmap 2.12.0", - "itertools 0.14.0", + "itertools", "log", "paste", "petgraph", ] +[[package]] +name = "datafusion-physical-expr-common" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24485475d9c618a1d33b2a3dad003d946dc7a7bbf0354d125301abc0a5a79e3e" +dependencies = [ + "ahash 0.8.12", + "arrow", + "datafusion-common 49.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-expr-common 49.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "hashbrown 0.14.5", + "itertools", +] + [[package]] name = "datafusion-physical-expr-common" version = "49.0.2" @@ -1642,10 +1966,10 @@ source = "git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c23 dependencies = [ "ahash 0.8.12", "arrow", - "datafusion-common", - "datafusion-expr-common", + "datafusion-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", + "datafusion-expr-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "hashbrown 0.14.5", - "itertools 0.14.0", + "itertools", ] [[package]] @@ -1654,15 +1978,15 @@ version = "49.0.2" source = "git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9#912eebec159e037c7c233aae35c090071675d5a9" dependencies = [ "arrow", - "datafusion-common", + "datafusion-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-execution", "datafusion-expr", - "datafusion-expr-common", + "datafusion-expr-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-physical-expr-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-physical-plan", "datafusion-pruning", - "itertools 0.14.0", + "itertools", "log", "recursive", ] @@ -1678,18 +2002,18 @@ dependencies = [ "arrow-schema", "async-trait", "chrono", - "datafusion-common", + "datafusion-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", "datafusion-functions-window-common", "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-physical-expr-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "futures", "half", "hashbrown 0.14.5", "indexmap 2.12.0", - "itertools 0.14.0", + "itertools", "log", "parking_lot", "pin-project-lite", @@ -1704,11 +2028,22 @@ dependencies = [ "arrow", "chrono", "datafusion", - "datafusion-common", + "datafusion-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-expr", - "datafusion-proto-common", + "datafusion-proto-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "object_store", - "prost", + "prost 0.13.5", +] + +[[package]] +name = "datafusion-proto-common" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ec788be522806740ad6372c0a2f7e45fb37cb37f786d9b77933add49cdd058f" +dependencies = [ + "arrow", + "datafusion-common 49.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "prost 0.13.5", ] [[package]] @@ -1717,8 +2052,8 @@ version = "49.0.2" source = "git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9#912eebec159e037c7c233aae35c090071675d5a9" dependencies = [ "arrow", - "datafusion-common", - "prost", + "datafusion-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", + "prost 0.13.5", ] [[package]] @@ -1728,13 +2063,13 @@ source = "git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c23 dependencies = [ "arrow", "arrow-schema", - "datafusion-common", + "datafusion-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-datasource", - "datafusion-expr-common", + "datafusion-expr-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-physical-expr-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-physical-plan", - "itertools 0.14.0", + "itertools", "log", ] @@ -1746,7 +2081,7 @@ dependencies = [ "arrow", "async-trait", "dashmap", - "datafusion-common", + "datafusion-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", @@ -1754,7 +2089,7 @@ dependencies = [ "datafusion-physical-plan", "datafusion-sql", "futures", - "itertools 0.14.0", + "itertools", "log", "object_store", "parking_lot", @@ -1769,7 +2104,7 @@ dependencies = [ "arrow", "bigdecimal", "chrono", - "datafusion-common", + "datafusion-common 49.0.2 (git+https://github.com/spiceai/datafusion.git?rev=912eebec159e037c7c233aae35c090071675d5a9)", "datafusion-expr", "indexmap 2.12.0", "log", @@ -1780,13 +2115,15 @@ dependencies = [ [[package]] name = "datafusion-table-providers" -version = "0.1.0" +version = "0.8.1" dependencies = [ "anyhow", "arrow", "arrow-array", "arrow-flight", + "arrow-ipc", "arrow-json", + "arrow-odbc", "arrow-schema", "async-stream", "async-trait", @@ -1794,12 +2131,13 @@ dependencies = [ "bb8", "bb8-postgres", "bigdecimal", - "bitflags 2.9.4", "bollard", "byte-unit", "byteorder", "bytes", "chrono", + "clickhouse", + "dashmap", "datafusion", "datafusion-expr", "datafusion-federation", @@ -1814,17 +2152,21 @@ dependencies = [ "geo-types", "geozero", "insta", - "itertools 0.13.0", + "itertools", + "libduckdb-sys 1.3.0", "mongodb", "mysql_async", "native-tls", "num-bigint", "num-traits", + "odbc-api 19.1.0", "pem", "postgres-native-tls", - "prost", + "prost 0.13.5", + "prost 0.14.1", "r2d2", - "rand 0.8.5", + "rand 0.9.2", + "regex", "reqwest", "rstest", "rusqlite", @@ -1833,6 +2175,7 @@ dependencies = [ "secrecy", "serde", "serde_json", + "sha2", "snafu", "tempfile", "test-log", @@ -1849,6 +2192,20 @@ dependencies = [ "uuid", ] +[[package]] +name = "datafusion-table-providers-python" +version = "0.8.1" +dependencies = [ + "arrow", + "arrow-flight", + "datafusion", + "datafusion-ffi", + "datafusion-table-providers", + "duckdb", + "pyo3", + "tokio", +] + [[package]] name = "deranged" version = "0.5.4" @@ -1905,6 +2262,12 @@ dependencies = [ "subtle", ] +[[package]] +name = "dispatch" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd0c93bb4b0c6d9b77f4435b0ae98c24d17f1c45b2ff844c6151a07256ca923b" + [[package]] name = "displaydoc" version = "0.2.5" @@ -1916,6 +2279,21 @@ dependencies = [ "syn 2.0.107", ] +[[package]] +name = "dlib" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "330c60081dcc4c72131f8eb70510f1ac07223e5d4163db481a04a0befcffa412" +dependencies = [ + "libloading 0.8.9", +] + +[[package]] +name = "dpi" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8b14ccef22fc6f5a8f4d7d768562a182c04ce9a3b3157b91390b52ddfdf1a76" + [[package]] name = "duckdb" version = "1.3.2" @@ -1926,7 +2304,7 @@ dependencies = [ "fallible-iterator 0.3.0", "fallible-streaming-iterator", "hashlink 0.10.0", - "libduckdb-sys", + "libduckdb-sys 1.3.2", "num", "num-integer", "r2d2", @@ -2065,7 +2443,7 @@ version = "25.9.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09b6620799e7340ebd9968d2e0708eb82cf1971e9a16821e2091b6d6e475eed5" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", "rustc_version", ] @@ -2099,7 +2477,28 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" dependencies = [ - "foreign-types-shared", + "foreign-types-shared 0.1.1", +] + +[[package]] +name = "foreign-types" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d737d9aa519fb7b749cbc3b962edcf310a8dd1f4b67c91c4f83975dbdd17d965" +dependencies = [ + "foreign-types-macros", + "foreign-types-shared 0.3.1", +] + +[[package]] +name = "foreign-types-macros" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", ] [[package]] @@ -2108,6 +2507,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" +[[package]] +name = "foreign-types-shared" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa9a19cbb55df58761df49b23516a86d432839add4af60fc256da840f66ed35b" + [[package]] name = "form_urlencoded" version = "1.2.2" @@ -2233,6 +2638,15 @@ dependencies = [ "slab", ] +[[package]] +name = "generational-arena" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877e94aff08e743b651baaea359664321055749b398adff8740a7399af7796e7" +dependencies = [ + "cfg-if", +] + [[package]] name = "generic-array" version = "0.14.9" @@ -2268,9 +2682,9 @@ dependencies = [ [[package]] name = "geozero" -version = "0.13.0" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cd8fb67347739a057fd607b6d8b43ba4ed93619ed84b8f429fa3296f8ae504c" +checksum = "e5f28f34864745eb2f123c990c6ffd92c1584bd39439b3f27ff2a0f4ea5b309b" dependencies = [ "geo-types", "geojson", @@ -2399,20 +2813,6 @@ dependencies = [ "hashbrown 0.15.5", ] -[[package]] -name = "hdrhistogram" -version = "7.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "765c9198f173dd59ce26ff9f95ef0aafd0a0fe01fb9d72841bc5066a4c06511d" -dependencies = [ - "base64 0.21.7", - "byteorder", - "crossbeam-channel", - "flate2", - "nom", - "num-traits", -] - [[package]] name = "heck" version = "0.4.1" @@ -2425,6 +2825,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + [[package]] name = "hex" version = "0.4.3" @@ -2647,10 +3053,10 @@ dependencies = [ ] [[package]] -name = "hyperlocal-next" -version = "0.9.0" +name = "hyperlocal" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acf569d43fa9848e510358c07b80f4adf34084ddc28c6a4a651ee8474c070dcc" +checksum = "986c5ce3b994526b3cd75578e62554abd09f0899d6206de48b3e96ab34ccc8c7" dependencies = [ "hex", "http-body-util", @@ -2831,6 +3237,12 @@ dependencies = [ "serde_core", ] +[[package]] +name = "indoc" +version = "2.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd" + [[package]] name = "inherent" version = "1.0.13" @@ -2894,15 +3306,6 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" -[[package]] -name = "itertools" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" -dependencies = [ - "either", -] - [[package]] name = "itertools" version = "0.14.0" @@ -2918,6 +3321,28 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +[[package]] +name = "jni" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a87aa2bb7d2af34197c04845522473242e1aa17c12f4935d5856491a7fb8c97" +dependencies = [ + "cesu8", + "cfg-if", + "combine", + "jni-sys", + "log", + "thiserror 1.0.69", + "walkdir", + "windows-sys 0.45.0", +] + +[[package]] +name = "jni-sys" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" + [[package]] name = "jobserver" version = "0.1.34" @@ -3022,6 +3447,21 @@ version = "0.2.177" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" +[[package]] +name = "libduckdb-sys" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25d3f1defe457d1ac0fbaef0fe6926953cb33419dd3c8dbac53882d020bee697" +dependencies = [ + "autocfg", + "flate2", + "pkg-config", + "serde", + "serde_json", + "tar", + "vcpkg", +] + [[package]] name = "libduckdb-sys" version = "1.3.2" @@ -3036,6 +3476,26 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "libloading" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" +dependencies = [ + "cfg-if", + "winapi", +] + +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link 0.2.1", +] + [[package]] name = "libm" version = "0.2.15" @@ -3048,17 +3508,18 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "416f7e718bdb06000964960ffa43b4335ad4012ae8b99060261aa4a8088d5ccb" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", "libc", - "redox_syscall", + "redox_syscall 0.5.18", ] [[package]] name = "libsqlite3-sys" -version = "0.28.0" +version = "0.30.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c10584274047cb335c23d3e61bcef8e323adae7c5c8c760540f73610177fc3f" +checksum = "2e99fb7a497b1e3339bc746195567ed8d3e24945ecd636e3619d20b9de9e9149" dependencies = [ + "cc", "pkg-config", "vcpkg", ] @@ -3089,6 +3550,12 @@ version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" +[[package]] +name = "linux-raw-sys" +version = "0.4.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" + [[package]] name = "linux-raw-sys" version = "0.11.0" @@ -3118,9 +3585,9 @@ checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" [[package]] name = "lru" -version = "0.12.5" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" +checksum = "9f8cc7106155f10bdf99a6f379688f543ad6596a415375b36a59a054ceda1198" dependencies = [ "hashbrown 0.15.5", ] @@ -3234,16 +3701,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" [[package]] -name = "mime" -version = "0.3.17" +name = "memoffset" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] [[package]] -name = "minimal-lexical" -version = "0.2.1" +name = "mime" +version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" [[package]] name = "miniz_oxide" @@ -3369,25 +3839,23 @@ dependencies = [ [[package]] name = "mysql_async" -version = "0.35.1" +version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d14cf024116ba8fef4a7fec5abf0bd5de89b9fb29a7e55818a119ac5ec745077" +checksum = "277ce2f2459b2af4cc6d0a0b7892381f80800832f57c533f03e2845f4ea331ea" dependencies = [ "bytes", - "crossbeam", + "crossbeam-queue", "flate2", "futures-core", "futures-sink", "futures-util", - "hdrhistogram", "keyed_priority_queue", "lru", "mysql_common", "native-tls", "pem", "percent-encoding", - "pin-project", - "rand 0.8.5", + "rand 0.9.2", "serde", "serde_json", "socket2 0.5.10", @@ -3401,37 +3869,32 @@ dependencies = [ [[package]] name = "mysql_common" -version = "0.34.1" +version = "0.35.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34a9141e735d5bb02414a7ac03add09522466d4db65bdd827069f76ae0850e58" +checksum = "fbb9f371618ce723f095c61fbcdc36e8936956d2b62832f9c7648689b338e052" dependencies = [ "base64 0.22.1", "bigdecimal", - "bitflags 2.9.4", + "bitflags 2.10.0", "btoi", "byteorder", "bytes", - "cc", "chrono", - "cmake", "crc32fast", "flate2", - "lazy_static", + "getrandom 0.3.4", "mysql-common-derive", "num-bigint", "num-traits", - "rand 0.8.5", "regex", "saturating", "serde", "serde_json", "sha1", "sha2", - "subprocess", - "thiserror 1.0.69", + "thiserror 2.0.17", "time", "uuid", - "zstd", ] [[package]] @@ -3446,19 +3909,39 @@ dependencies = [ "openssl-probe", "openssl-sys", "schannel", - "security-framework", + "security-framework 2.11.1", "security-framework-sys", "tempfile", ] [[package]] -name = "nom" -version = "7.1.3" +name = "ndk" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +checksum = "c3f42e7bbe13d351b6bead8286a43aac9534b82bd3cc43e47037f012ebfd62d4" dependencies = [ - "memchr", - "minimal-lexical", + "bitflags 2.10.0", + "jni-sys", + "log", + "ndk-sys", + "num_enum", + "raw-window-handle", + "thiserror 1.0.69", +] + +[[package]] +name = "ndk-context" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27b02d87554356db9e9a873add8782d4ea6e3e58ea071a9adb9a2e8ddb884a8b" + +[[package]] +name = "ndk-sys" +version = "0.6.0+11769913" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee6cda3051665f1fb8d9e08fc35c96d5a244fb1be711a03b71118828afc9a873" +dependencies = [ + "jni-sys", ] [[package]] @@ -3550,6 +4033,231 @@ dependencies = [ "libm", ] +[[package]] +name = "num_enum" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1207a7e20ad57b847bbddc6776b968420d38292bbfe2089accff5e19e82454c" +dependencies = [ + "num_enum_derive", + "rustversion", +] + +[[package]] +name = "num_enum_derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff32365de1b6743cb203b710788263c44a03de03802daf96092f2da4fe6ba4d7" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 2.0.107", +] + +[[package]] +name = "objc-sys" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdb91bdd390c7ce1a8607f35f3ca7151b65afc0ff5ff3b34fa350f7d7c7e4310" + +[[package]] +name = "objc2" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46a785d4eeff09c14c487497c162e92766fbb3e4059a71840cecc03d9a50b804" +dependencies = [ + "objc-sys", + "objc2-encode", +] + +[[package]] +name = "objc2-app-kit" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4e89ad9e3d7d297152b17d39ed92cd50ca8063a89a9fa569046d41568891eff" +dependencies = [ + "bitflags 2.10.0", + "block2", + "libc", + "objc2", + "objc2-core-data", + "objc2-core-image", + "objc2-foundation", + "objc2-quartz-core", +] + +[[package]] +name = "objc2-cloud-kit" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74dd3b56391c7a0596a295029734d3c1c5e7e510a4cb30245f8221ccea96b009" +dependencies = [ + "bitflags 2.10.0", + "block2", + "objc2", + "objc2-core-location", + "objc2-foundation", +] + +[[package]] +name = "objc2-contacts" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5ff520e9c33812fd374d8deecef01d4a840e7b41862d849513de77e44aa4889" +dependencies = [ + "block2", + "objc2", + "objc2-foundation", +] + +[[package]] +name = "objc2-core-data" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "617fbf49e071c178c0b24c080767db52958f716d9eabdf0890523aeae54773ef" +dependencies = [ + "bitflags 2.10.0", + "block2", + "objc2", + "objc2-foundation", +] + +[[package]] +name = "objc2-core-image" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55260963a527c99f1819c4f8e3b47fe04f9650694ef348ffd2227e8196d34c80" +dependencies = [ + "block2", + "objc2", + "objc2-foundation", + "objc2-metal", +] + +[[package]] +name = "objc2-core-location" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "000cfee34e683244f284252ee206a27953279d370e309649dc3ee317b37e5781" +dependencies = [ + "block2", + "objc2", + "objc2-contacts", + "objc2-foundation", +] + +[[package]] +name = "objc2-encode" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef25abbcd74fb2609453eb695bd2f860d389e457f67dc17cafc8b8cbc89d0c33" + +[[package]] +name = "objc2-foundation" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ee638a5da3799329310ad4cfa62fbf045d5f56e3ef5ba4149e7452dcf89d5a8" +dependencies = [ + "bitflags 2.10.0", + "block2", + "dispatch", + "libc", + "objc2", +] + +[[package]] +name = "objc2-link-presentation" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1a1ae721c5e35be65f01a03b6d2ac13a54cb4fa70d8a5da293d7b0020261398" +dependencies = [ + "block2", + "objc2", + "objc2-app-kit", + "objc2-foundation", +] + +[[package]] +name = "objc2-metal" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd0cba1276f6023976a406a14ffa85e1fdd19df6b0f737b063b95f6c8c7aadd6" +dependencies = [ + "bitflags 2.10.0", + "block2", + "objc2", + "objc2-foundation", +] + +[[package]] +name = "objc2-quartz-core" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e42bee7bff906b14b167da2bac5efe6b6a07e6f7c0a21a7308d40c960242dc7a" +dependencies = [ + "bitflags 2.10.0", + "block2", + "objc2", + "objc2-foundation", + "objc2-metal", +] + +[[package]] +name = "objc2-symbols" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a684efe3dec1b305badae1a28f6555f6ddd3bb2c2267896782858d5a78404dc" +dependencies = [ + "objc2", + "objc2-foundation", +] + +[[package]] +name = "objc2-ui-kit" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8bb46798b20cd6b91cbd113524c490f1686f4c4e8f49502431415f3512e2b6f" +dependencies = [ + "bitflags 2.10.0", + "block2", + "objc2", + "objc2-cloud-kit", + "objc2-core-data", + "objc2-core-image", + "objc2-core-location", + "objc2-foundation", + "objc2-link-presentation", + "objc2-quartz-core", + "objc2-symbols", + "objc2-uniform-type-identifiers", + "objc2-user-notifications", +] + +[[package]] +name = "objc2-uniform-type-identifiers" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44fa5f9748dbfe1ca6c0b79ad20725a11eca7c2218bceb4b005cb1be26273bfe" +dependencies = [ + "block2", + "objc2", + "objc2-foundation", +] + +[[package]] +name = "objc2-user-notifications" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76cfcbf642358e8689af64cee815d139339f3ed8ad05103ed5eaf73db8d84cb3" +dependencies = [ + "bitflags 2.10.0", + "block2", + "objc2", + "objc2-core-location", + "objc2-foundation", +] + [[package]] name = "object_store" version = "0.12.4" @@ -3562,7 +4270,7 @@ dependencies = [ "futures", "http", "humantime", - "itertools 0.14.0", + "itertools", "parking_lot", "percent-encoding", "thiserror 2.0.17", @@ -3574,6 +4282,46 @@ dependencies = [ "web-time", ] +[[package]] +name = "odbc-api" +version = "17.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67fb19e1cee47949af1445f6e46f02c6b5dc02bff176fe6d5ab0c6e5a47bcbf8" +dependencies = [ + "atoi", + "log", + "odbc-sys 0.25.1", + "thiserror 2.0.17", + "widestring", + "winit", +] + +[[package]] +name = "odbc-api" +version = "19.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f017d3949731e436bc1bb9a1fbc34197c2f39c588cdcb60d21adb1f8dd3b8514" +dependencies = [ + "atoi", + "log", + "odbc-sys 0.27.3", + "thiserror 2.0.17", + "widestring", + "winit", +] + +[[package]] +name = "odbc-sys" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ecdb20f7c165083ad1bc9f55122f677725e257716a5bc83e5413d5654b7d6f1" + +[[package]] +name = "odbc-sys" +version = "0.27.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd7e3c4b5b7bbd3e7bd01dc00cb4614f2445591cad1f6f18a7e16d7f98c392e9" + [[package]] name = "once_cell" version = "1.21.3" @@ -3592,9 +4340,9 @@ version = "0.10.74" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24ad14dd45412269e1a30f52ad8f0664f0f4f4a89ee8fe28c3b3527021ebb654" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", "cfg-if", - "foreign-types", + "foreign-types 0.3.2", "libc", "once_cell", "openssl-macros", @@ -3630,6 +4378,15 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "orbclient" +version = "0.3.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba0b26cec2e24f08ed8bb31519a9333140a6599b867dac464bb150bdb796fd43" +dependencies = [ + "libredox", +] + [[package]] name = "ordered-float" version = "2.10.1" @@ -3657,7 +4414,7 @@ checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" dependencies = [ "cfg-if", "libc", - "redox_syscall", + "redox_syscall 0.5.18", "smallvec", "windows-link 0.2.1", ] @@ -3811,10 +4568,30 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] -name = "pkg-config" -version = "0.3.32" +name = "pkg-config" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + +[[package]] +name = "polling" +version = "3.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d0e4f59085d47d8241c88ead0f274e8a0cb551f3625263c05eb8dd897c34218" +dependencies = [ + "cfg-if", + "concurrent-queue", + "hermit-abi", + "pin-project-lite", + "rustix 1.1.2", + "windows-sys 0.61.2", +] + +[[package]] +name = "portable-atomic" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" [[package]] name = "postgres-native-tls" @@ -3933,7 +4710,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" dependencies = [ "bytes", - "prost-derive", + "prost-derive 0.13.5", +] + +[[package]] +name = "prost" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7231bd9b3d3d33c86b58adbac74b5ec0ad9f496b19d22801d773636feaa95f3d" +dependencies = [ + "bytes", + "prost-derive 0.14.1", ] [[package]] @@ -3943,7 +4730,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" dependencies = [ "anyhow", - "itertools 0.14.0", + "itertools", + "proc-macro2", + "quote", + "syn 2.0.107", +] + +[[package]] +name = "prost-derive" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9120690fafc389a67ba3803df527d0ec9cbbc9cc45e4cc20b332996dfb672425" +dependencies = [ + "anyhow", + "itertools", "proc-macro2", "quote", "syn 2.0.107", @@ -3955,7 +4755,7 @@ version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" dependencies = [ - "prost", + "prost 0.13.5", ] [[package]] @@ -3987,6 +4787,69 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "pyo3" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5203598f366b11a02b13aa20cab591229ff0a89fd121a308a5df751d5fc9219" +dependencies = [ + "cfg-if", + "indoc", + "libc", + "memoffset", + "once_cell", + "portable-atomic", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99636d423fa2ca130fa5acde3059308006d46f98caac629418e53f7ebb1e9999" +dependencies = [ + "once_cell", + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78f9cf92ba9c409279bc3305b5409d90db2d2c22392d443a87df3a1adad59e33" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b999cb1a6ce21f9a6b147dcf1be9ffedf02e0043aec74dc390f3007047cecd9" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn 2.0.107", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "822ece1c7e1012745607d5cf0bcb2874769f0f7cb34c4cde03b9358eb9ef911a" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "pyo3-build-config", + "quote", + "syn 2.0.107", +] + [[package]] name = "quote" version = "1.0.41" @@ -4078,6 +4941,12 @@ dependencies = [ "getrandom 0.3.4", ] +[[package]] +name = "raw-window-handle" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20675572f6f24e9e76ef639bc5552774ed45f1c30e2951e1e99c59888861c539" + [[package]] name = "recursive" version = "0.1.1" @@ -4098,13 +4967,22 @@ dependencies = [ "syn 2.0.107", ] +[[package]] +name = "redox_syscall" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +dependencies = [ + "bitflags 1.3.2", +] + [[package]] name = "redox_syscall" version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", ] [[package]] @@ -4171,6 +5049,21 @@ dependencies = [ "bytecheck", ] +[[package]] +name = "replace_with" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51743d3e274e2b18df81c4dc6caf8a5b8e15dbe799e0dca05c7617380094e884" + +[[package]] +name = "repr_offset" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb1070755bd29dffc19d0971cab794e607839ba2ef4b69a9e6fbc8733c1b72ea" +dependencies = [ + "tstr", +] + [[package]] name = "reqwest" version = "0.12.24" @@ -4262,21 +5155,20 @@ dependencies = [ [[package]] name = "rstest" -version = "0.22.0" +version = "0.26.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b423f0e62bdd61734b67cd21ff50871dfaeb9cc74f869dcd6af974fbcb19936" +checksum = "f5a3193c063baaa2a95a33f03035c8a72b83d97a54916055ba22d35ed3839d49" dependencies = [ - "futures", "futures-timer", + "futures-util", "rstest_macros", - "rustc_version", ] [[package]] name = "rstest_macros" -version = "0.22.0" +version = "0.26.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5e1711e7d14f74b12a58411c542185ef7fb7f2e7f8ee6e2940a883628522b42" +checksum = "9c845311f0ff7951c5506121a9ad75aec44d083c31583b2ea5a30bcb0b0abba0" dependencies = [ "cfg-if", "glob", @@ -4292,11 +5184,11 @@ dependencies = [ [[package]] name = "rusqlite" -version = "0.31.0" +version = "0.32.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b838eba278d213a8beaf485bd313fd580ca4505a00d5871caeb1457c55322cae" +checksum = "7753b721174eb8ff87a9a0e799e2d7bc3749323e773db92e0984debb00019d6e" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", "fallible-iterator 0.3.0", "fallible-streaming-iterator", "hashlink 0.9.1", @@ -4340,16 +5232,29 @@ dependencies = [ "semver", ] +[[package]] +name = "rustix" +version = "0.38.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" +dependencies = [ + "bitflags 2.10.0", + "errno", + "libc", + "linux-raw-sys 0.4.15", + "windows-sys 0.59.0", +] + [[package]] name = "rustix" version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", "errno", "libc", - "linux-raw-sys", + "linux-raw-sys 0.11.0", "windows-sys 0.61.2", ] @@ -4368,6 +5273,18 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rustls-native-certs" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9980d917ebb0c0536119ba501e90834767bffc3d60641457fd84a1f3fd337923" +dependencies = [ + "openssl-probe", + "rustls-pki-types", + "schannel", + "security-framework 3.5.1", +] + [[package]] name = "rustls-pemfile" version = "2.2.0" @@ -4480,8 +5397,9 @@ checksum = "04c565b551bafbef4157586fa379538366e4385d42082f255bfd96e4fe8519da" [[package]] name = "sea-query" -version = "0.32.0-rc.1" -source = "git+https://github.com/spiceai/sea-query.git?rev=213b6b876068f58159ebdd5852604a021afaebf9#213b6b876068f58159ebdd5852604a021afaebf9" +version = "0.32.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a5d1c518eaf5eda38e5773f902b26ab6d5e9e9e2bb2349ca6c64cf96f80448c" dependencies = [ "bigdecimal", "chrono", @@ -4493,14 +5411,16 @@ dependencies = [ [[package]] name = "sea-query-derive" -version = "0.4.1" -source = "git+https://github.com/spiceai/sea-query.git?rev=213b6b876068f58159ebdd5852604a021afaebf9#213b6b876068f58159ebdd5852604a021afaebf9" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bae0cbad6ab996955664982739354128c58d16e126114fe88c2a493642502aab" dependencies = [ + "darling 0.20.11", "heck 0.4.1", "proc-macro2", "quote", "syn 2.0.107", - "thiserror 1.0.69", + "thiserror 2.0.17", ] [[package]] @@ -4509,6 +5429,17 @@ version = "4.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" +[[package]] +name = "sealed" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22f968c5ea23d555e670b449c1c5e7b2fc399fdaec1d304a17cd48e288abc107" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", +] + [[package]] name = "secrecy" version = "0.10.3" @@ -4524,8 +5455,21 @@ version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ - "bitflags 2.9.4", - "core-foundation", + "bitflags 2.10.0", + "core-foundation 0.9.4", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework" +version = "3.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3297343eaf830f66ede390ea39da1d462b6b0c1b000f420d0a83f898bbbe6ef" +dependencies = [ + "bitflags 2.10.0", + "core-foundation 0.10.1", "core-foundation-sys", "libc", "security-framework-sys", @@ -4593,6 +5537,17 @@ dependencies = [ "syn 2.0.107", ] +[[package]] +name = "serde_derive_internals" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", +] + [[package]] name = "serde_json" version = "1.0.145" @@ -4743,6 +5698,15 @@ version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +[[package]] +name = "smol_str" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd538fb6910ac1099850255cf94a94df6551fbdd602454387d0adb2d1ca6dead" +dependencies = [ + "serde", +] + [[package]] name = "snafu" version = "0.8.9" @@ -4831,6 +5795,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "stringprep" version = "0.1.5" @@ -4869,16 +5839,6 @@ dependencies = [ "syn 2.0.107", ] -[[package]] -name = "subprocess" -version = "0.2.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c2e86926081dda636c546d8c5e641661049d7562a68f5488be4a1f7f66f6086" -dependencies = [ - "libc", - "winapi", -] - [[package]] name = "subtle" version = "2.6.1" @@ -4933,8 +5893,8 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" dependencies = [ - "bitflags 2.9.4", - "core-foundation", + "bitflags 2.10.0", + "core-foundation 0.9.4", "system-configuration-sys", ] @@ -4971,6 +5931,12 @@ dependencies = [ "xattr", ] +[[package]] +name = "target-lexicon" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df7f62577c25e07834649fc3b39fafdc597c0a3527dc1c60129201ccfcbaa50c" + [[package]] name = "tempfile" version = "3.23.0" @@ -4980,7 +5946,7 @@ dependencies = [ "fastrand", "getrandom 0.3.4", "once_cell", - "rustix", + "rustix 1.1.2", "windows-sys 0.61.2", ] @@ -5217,9 +6183,9 @@ dependencies = [ [[package]] name = "tokio-rusqlite" -version = "0.5.1" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2cc5f712424f089fc6549afe39773e9f8914ce170c45b546be24830b482b127" +checksum = "b65501378eb676f400c57991f42cbd0986827ab5c5200c53f206d710fb32a945" dependencies = [ "crossbeam-channel", "rusqlite", @@ -5311,7 +6277,8 @@ dependencies = [ "hyper-util", "percent-encoding", "pin-project", - "prost", + "prost 0.13.5", + "rustls-native-certs", "rustls-pemfile", "socket2 0.5.10", "tokio", @@ -5321,6 +6288,7 @@ dependencies = [ "tower-layer", "tower-service", "tracing", + "webpki-roots 0.26.11", ] [[package]] @@ -5364,7 +6332,7 @@ version = "0.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", "bytes", "futures-util", "http", @@ -5501,12 +6469,33 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "tstr" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f8e0294f14baae476d0dd0a2d780b2e24d66e349a9de876f5126777a37bdba7" +dependencies = [ + "tstr_proc_macros", +] + +[[package]] +name = "tstr_proc_macros" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e78122066b0cb818b8afd08f7ed22f7fdbc3e90815035726f0840d0d26c0747a" + [[package]] name = "twox-hash" version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" +[[package]] +name = "typed-arena" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a" + [[package]] name = "typed-builder" version = "0.20.1" @@ -5533,6 +6522,12 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" +[[package]] +name = "typewit" +version = "1.14.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8c1ae7cc0fdb8b842d65d127cb981574b0d2b249b74d1c7a2986863dc134f71" + [[package]] name = "unicode-bidi" version = "0.3.18" @@ -5572,6 +6567,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" +[[package]] +name = "unindent" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" + [[package]] name = "untrusted" version = "0.9.0" @@ -5930,6 +6931,15 @@ dependencies = [ "windows-link 0.2.1", ] +[[package]] +name = "windows-sys" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +dependencies = [ + "windows-targets 0.42.2", +] + [[package]] name = "windows-sys" version = "0.48.0" @@ -5975,6 +6985,21 @@ dependencies = [ "windows-link 0.2.1", ] +[[package]] +name = "windows-targets" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +dependencies = [ + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + [[package]] name = "windows-targets" version = "0.48.5" @@ -6023,6 +7048,12 @@ dependencies = [ "windows_x86_64_msvc 0.53.1", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -6041,6 +7072,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -6059,6 +7096,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -6089,6 +7132,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -6107,6 +7156,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -6125,6 +7180,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -6143,6 +7204,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" @@ -6161,6 +7228,46 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" +[[package]] +name = "winit" +version = "0.30.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c66d4b9ed69c4009f6321f762d6e61ad8a2389cd431b97cb1e146812e9e6c732" +dependencies = [ + "android-activity", + "atomic-waker", + "bitflags 2.10.0", + "block2", + "calloop", + "cfg_aliases", + "concurrent-queue", + "core-foundation 0.9.4", + "core-graphics", + "cursor-icon", + "dpi", + "js-sys", + "libc", + "ndk", + "objc2", + "objc2-app-kit", + "objc2-foundation", + "objc2-ui-kit", + "orbclient", + "pin-project", + "raw-window-handle", + "redox_syscall 0.4.1", + "rustix 0.38.44", + "smol_str", + "tracing", + "unicode-segmentation", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "web-time", + "windows-sys 0.52.0", + "xkbcommon-dl", +] + [[package]] name = "winnow" version = "0.7.13" @@ -6188,9 +7295,9 @@ checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" [[package]] name = "wkt" -version = "0.10.3" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3c2252781f8927974e8ba6a67c965a759a2b88ea2b1825f6862426bbb1c8f41" +checksum = "54f7f1ff4ea4c18936d6cd26a6fd24f0003af37e951a8e0e8b9e9a2d0bd0a46d" dependencies = [ "geo-types", "log", @@ -6220,9 +7327,28 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32e45ad4206f6d2479085147f02bc2ef834ac85886624a23575ae137c8aa8156" dependencies = [ "libc", - "rustix", + "rustix 1.1.2", +] + +[[package]] +name = "xkbcommon-dl" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d039de8032a9a8856a6be89cea3e5d12fdd82306ab7c94d74e6deab2460651c5" +dependencies = [ + "bitflags 2.10.0", + "dlib", + "log", + "once_cell", + "xkeysym", ] +[[package]] +name = "xkeysym" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9cc00251562a284751c9973bace760d86c0276c471b4be569fe6b068ee97a56" + [[package]] name = "xz2" version = "0.1.7" diff --git a/Cargo.toml b/Cargo.toml index 860c4bb6..5c1f9729 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,7 @@ # under the License. [workspace] -members = ["core"] +members = ["core", "python"] resolver = "2" [workspace.package] @@ -37,7 +37,7 @@ arrow-flight = { version = "55.0.2", features = [ arrow-ipc = { version = "55.0.2" } arrow-schema = { version = "55.0.2", features = ["serde"] } arrow-json = "55.0.2" -arrow-odbc = { version = "20.0.0" } +arrow-odbc = { version = "=20.0.0" } datafusion = { version = "49.0.2", default-features = false } datafusion-expr = { version = "49.0.2" } datafusion-federation = { version = "0.4.2" } @@ -46,18 +46,15 @@ datafusion-proto = { version = "49.0.2" } datafusion-physical-expr = { version = "49.0.2" } datafusion-physical-plan = { version = "49.0.2" } datafusion-table-providers = { path = "core" } -duckdb = { version = "=1.3.1", package = "spiceai_duckdb_fork" } # Forked to add support for duckdb_scan_arrow, pending: https://github.com/duckdb/duckdb-rs/pull/488 +duckdb = { git = "https://github.com/spiceai/duckdb-rs.git", rev = "a4b83432acfe1dfdd140e35d4603701ae76f6607" } # spiceai-1.3.2 with arrow 55 support [patch.crates-io] datafusion-federation = { git = "https://github.com/spiceai/datafusion-federation.git", rev = "5ad2f52b9bafc6eaa50851f2e1fcf0585fb5184d" } # spiceai-49 -duckdb = { git = "https://github.com/spiceai/duckdb-rs.git", rev = "a4b83432acfe1dfdd140e35d4603701ae76f6607" } # spiceai-1.3.2 - datafusion = { git = "https://github.com/spiceai/datafusion.git", rev = "912eebec159e037c7c233aae35c090071675d5a9" } # spiceai-49 datafusion-expr = { git = "https://github.com/spiceai/datafusion.git", rev = "912eebec159e037c7c233aae35c090071675d5a9" } # spiceai-49 datafusion-physical-expr = { git = "https://github.com/spiceai/datafusion.git", rev = "912eebec159e037c7c233aae35c090071675d5a9" } # spiceai-49 datafusion-physical-plan = { git = "https://github.com/spiceai/datafusion.git", rev = "912eebec159e037c7c233aae35c090071675d5a9" } # spiceai-49 datafusion-proto = { git = "https://github.com/spiceai/datafusion.git", rev = "912eebec159e037c7c233aae35c090071675d5a9" } # spiceai-49 - arrow = { git = "https://github.com/spiceai/arrow-rs.git", rev = "53162ed30fe6a2ed219b0af4dbbcd5d14745d7c2" } # spiceai-55.2 arrow-array = { git = "https://github.com/spiceai/arrow-rs.git", rev = "53162ed30fe6a2ed219b0af4dbbcd5d14745d7c2" } # spiceai-55.2 arrow-buffer = { git = "https://github.com/spiceai/arrow-rs.git", rev = "53162ed30fe6a2ed219b0af4dbbcd5d14745d7c2" } # spiceai-55.2 @@ -67,4 +64,4 @@ arrow-json = { git = "https://github.com/spiceai/arrow-rs.git", rev = "53162ed30 arrow-ipc = { git = "https://github.com/spiceai/arrow-rs.git", rev = "53162ed30fe6a2ed219b0af4dbbcd5d14745d7c2" } # spiceai-55.2 arrow-ord = { git = "https://github.com/spiceai/arrow-rs.git", rev = "53162ed30fe6a2ed219b0af4dbbcd5d14745d7c2" } # spiceai-55.2 arrow-schema = { git = "https://github.com/spiceai/arrow-rs.git", rev = "53162ed30fe6a2ed219b0af4dbbcd5d14745d7c2" } # spiceai-55.2 -parquet = { git = "https://github.com/spiceai/arrow-rs.git", rev = "53162ed30fe6a2ed219b0af4dbbcd5d14745d7c2" } # spiceai-55.2 \ No newline at end of file +parquet = { git = "https://github.com/spiceai/arrow-rs.git", rev = "53162ed30fe6a2ed219b0af4dbbcd5d14745d7c2" } # spiceai-55.2 diff --git a/Makefile b/Makefile index fc993126..384b547a 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ all: .PHONY: test test: - cargo test --all-features + cargo test --features clickhouse-federation,duckdb-federation,flight,mysql-federation,postgres-federation,sqlite-federation -p datafusion-table-providers --lib .PHONY: lint lint: @@ -11,4 +11,4 @@ lint: .PHONY: test-integration test-integration: - RUST_LOG=debug cargo test --test integration --no-default-features --features postgres,sqlite,mysql,mongodb -- --nocapture + RUST_LOG=debug cargo test --test integration --no-default-features --features postgres,sqlite,mysql,flight,clickhouse,mongodb -- --nocapture diff --git a/README.md b/README.md index 7da1fb03..7fe2725d 100644 --- a/README.md +++ b/README.md @@ -6,16 +6,30 @@ The goal of this repo is to extend the capabilities of DataFusion to support add Many of the table providers in this repo are for querying data from other database systems. Those providers also integrate with the [`datafusion-federation`](https://github.com/datafusion-contrib/datafusion-federation/) crate to allow for more efficient query execution, such as pushing down joins between multiple tables from the same database system, or efficiently implementing TopK style queries (`SELECT * FROM table ORDER BY foo LIMIT 10`). +To use these table providers with efficient federation push-down, add the `datafusion-federation` crate and create a DataFusion `SessionContext` using the Federation optimizer rule and query planner with: + +```rust +use datafusion::prelude::SessionContext; + +let state = datafusion_federation::default_session_state(); +let ctx = SessionContext::with_state(state); + +// Register the specific table providers into ctx +// queries will now automatically be federated +``` + ## Table Providers - PostgreSQL - MySQL - SQLite +- ClickHouse - DuckDB - Flight SQL - MongoDB +- ODBC -## Examples +## Examples (in Rust) Run the included examples to see how to use the table providers: @@ -33,6 +47,7 @@ cargo run --example duckdb_function --features duckdb ### SQLite ```bash +# Run from repo folder cargo run --example sqlite --features sqlite ``` @@ -68,7 +83,62 @@ EOF ``` ```bash -cargo run --example postgres --features postgres +# Run from repo folder +cargo run -p datafusion-table-providers --example postgres --features postgres + +``` + +### ClickHouse + +In order to run the Clickhouse example, you need to have a Clickhouse server running. You can use the following command to start a Clickhouse server in a Docker container the example can use: + +```bash +docker run --name clickhouse \ + -e CLICKHOUSE_DB=default \ + -e CLICKHOUSE_USER=admin \ + -e CLICKHOUSE_PASSWORD=secret \ + -p 8123:8123 \ + -p 9000:9000 \ + -d clickhouse/clickhouse-server:24.8-alpine + +# 2. Wait for readiness +echo "Waiting for ClickHouse to start..." +until curl -s http://localhost:8123/ping | grep -q 'Ok'; do + sleep 2 +done +echo + +# 3. Create tables and a parameterized view +docker exec -i clickhouse clickhouse-client \ + --user=admin --password=secret --multiquery < &str { + match self { + InsertMethod::Prepared => "insert_batch_prepared (NEW)", + InsertMethod::Inline => "insert_batch (OLD)", + } + } +} + +/// Benchmark for SQLite insert performance comparing prepared statements vs inline SQL +/// +/// This benchmark measures the performance of inserting data into SQLite +/// using both the new prepared statement approach and the old inline SQL approach. +/// +/// Set the environment variable SQLITE_INSERT_METHOD to control which method to test: +/// - "prepared" (default): Use prepared statements +/// - "inline": Use inline SQL generation +/// - "both": Test both methods and compare +#[tokio::main] +async fn main() { + println!("\n=== SQLite Insert Performance Benchmark ===\n"); + + // Determine which method(s) to test + let test_mode = std::env::var("SQLITE_INSERT_METHOD") + .unwrap_or_else(|_| "both".to_string()) + .to_lowercase(); + + let methods_to_test = match test_mode.as_str() { + "inline" => vec![InsertMethod::Inline], + "prepared" => vec![InsertMethod::Prepared], + "both" => vec![InsertMethod::Inline, InsertMethod::Prepared], + _ => vec![InsertMethod::Inline, InsertMethod::Prepared], + }; + + // Test configurations: (num_batches, rows_per_batch) + let test_configs = vec![ + (10, 1), + (10, 10), + (10, 100), + (1, 1000), + (10, 1000), + (100, 1000), + (10, 10000), + (5, 50000), + (5, 100000), + (5, 1000000), + ]; + + // Store results for comparison + type BenchmarkResults = Vec<(InsertMethod, Vec<(usize, f64, f64)>)>; + let mut results: BenchmarkResults = Vec::new(); + + for method in &methods_to_test { + println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"); + println!("Testing Method: {}", method.name()); + println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"); + + let mut method_results = Vec::new(); + + for (num_batches, rows_per_batch) in &test_configs { + let total_rows = num_batches * rows_per_batch; + println!( + " Config: {} batches × {} rows = {} total rows", + num_batches, rows_per_batch, total_rows + ); + + let duration = run_benchmark(*num_batches, *rows_per_batch, *method).await; + let rows_per_sec = total_rows as f64 / duration.as_secs_f64(); + let time_per_row = duration.as_micros() as f64 / total_rows as f64; + + println!(" ⏱️ Time taken: {:.3}s", duration.as_secs_f64()); + println!(" 🚀 Throughput: {:.0} rows/sec", rows_per_sec); + println!(" 📊 Per-row time: {:.2}µs\n", time_per_row); + + method_results.push((total_rows, rows_per_sec, time_per_row)); + } + + results.push((*method, method_results)); + println!(); + } + + // Print comparison summary if both methods were tested + if methods_to_test.len() > 1 { + println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"); + println!("Performance Comparison"); + println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"); + + println!( + "{:<15} {:<20} {:<20} {:<15}", + "Total Rows", "OLD (rows/sec)", "NEW (rows/sec)", "Speedup" + ); + println!("{}", "─".repeat(75)); + + for i in 0..test_configs.len() { + let (total_rows, old_throughput, _) = results[0].1[i]; + let (_, new_throughput, _) = results[1].1[i]; + let speedup = new_throughput / old_throughput; + + println!( + "{:<15} {:<20.0} {:<20.0} {:.2}x", + total_rows, old_throughput, new_throughput, speedup + ); + } + + println!("\n{}", "─".repeat(75)); + + // Calculate average speedup + let avg_speedup: f64 = (0..test_configs.len()) + .map(|i| results[1].1[i].1 / results[0].1[i].1) + .sum::() + / test_configs.len() as f64; + + println!( + "\n📊 Average speedup: {:.2}x faster with prepared statements", + avg_speedup + ); + println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"); + } +} + +async fn run_benchmark( + num_batches: usize, + rows_per_batch: usize, + method: InsertMethod, +) -> std::time::Duration { + // Create schema with multiple column types + let schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int64, false), + Field::new("name", DataType::Utf8, false), + Field::new("value", DataType::Float64, false), + Field::new("category", DataType::Utf8, true), + Field::new("count", DataType::Int64, true), + ])); + + let df_schema = ToDFSchema::to_dfschema_ref(Arc::clone(&schema)).expect("df schema"); + + // Create a unique table name to avoid conflicts + let table_name = format!( + "bench_table_{}", + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_millis() + ); + + let external_table = CreateExternalTable { + schema: df_schema, + name: TableReference::bare(table_name), + location: String::new(), + file_type: String::new(), + table_partition_cols: vec![], + if_not_exists: true, + definition: None, + order_exprs: vec![], + unbounded: false, + options: std::collections::HashMap::new(), + constraints: Constraints::new_unverified(vec![]), + column_defaults: std::collections::HashMap::default(), + temporary: false, + }; + + let ctx = SessionContext::new(); + + // Configure the factory based on which method we're testing + let use_prepared = match method { + InsertMethod::Prepared => true, + InsertMethod::Inline => false, + }; + + let table = SqliteTableProviderFactory::default() + .with_batch_insert_use_prepared_statements(use_prepared) + .create(&ctx.state(), &external_table) + .await + .expect("table should be created"); + + // Generate batches + let batches: Vec> = (0..num_batches) + .map(|batch_idx| { + let start_id = batch_idx * rows_per_batch; + + let ids: Vec = (start_id..(start_id + rows_per_batch)) + .map(|i| i as i64) + .collect(); + + let names: Vec = (start_id..(start_id + rows_per_batch)) + .map(|i| format!("name_{}", i)) + .collect(); + + let values: Vec = (start_id..(start_id + rows_per_batch)) + .map(|i| (i as f64) * 1.5) + .collect(); + + let categories: Vec> = (start_id..(start_id + rows_per_batch)) + .map(|i| Some(format!("category_{}", i % 10))) + .collect(); + + let counts: Vec> = (start_id..(start_id + rows_per_batch)) + .map(|i| { + if i % 3 == 0 { + Some((i % 100) as i64) + } else { + None + } + }) + .collect(); + + let id_array = Int64Array::from(ids); + let name_array = StringArray::from(names); + let value_array = Float64Array::from(values); + let category_array = StringArray::from(categories); + let count_array = Int64Array::from(counts); + + Ok(RecordBatch::try_new( + Arc::clone(&schema), + vec![ + Arc::new(id_array), + Arc::new(name_array), + Arc::new(value_array), + Arc::new(category_array), + Arc::new(count_array), + ], + ) + .expect("batch should be created")) + }) + .collect(); + + let exec = MockExec::new(batches, schema); + + // Start timing + let start = Instant::now(); + + let insertion = table + .insert_into(&ctx.state(), Arc::new(exec), InsertOp::Append) + .await + .expect("insertion should be successful"); + + collect(insertion, ctx.task_ctx()) + .await + .expect("insert successful"); + + // End timing + start.elapsed() +} diff --git a/core/Cargo.toml b/core/Cargo.toml index b902b4b9..200682aa 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -7,126 +7,173 @@ repository = "https://github.com/datafusion-contrib/datafusion-table-providers" license = "Apache-2.0" [dependencies] -arrow = { workspace = true, features = ["ffi"] } +arrow = { workspace = true } +arrow-ipc = { workspace = true, optional = true } arrow-array = { workspace = true, optional = true } -arrow-flight = { workspace = true, optional = true, features = ["flight-sql-experimental", "tls"] } +arrow-flight = { workspace = true, optional = true, features = [ + "flight-sql-experimental", + "tls", +] } arrow-schema = { workspace = true, optional = true, features = ["serde"] } arrow-json = { workspace = true } -async-stream = { version = "0.3.5", optional = true } -async-trait = "0.1.80" -num-bigint = "0.4.4" +arrow-odbc = { workspace = true, optional = true } +async-stream = { version = "0.3", optional = true } +async-trait = "0.1" base64 = { version = "0.22.1", optional = true } -bitflags = "2.9.1" +bb8 = { version = "0.9", optional = true } +bb8-postgres = { version = "0.9", optional = true } +bigdecimal = "0.4" +byteorder = "1.5.0" bytes = { version = "1.7.1", optional = true } byte-unit = { version = "5.1.4", optional = true } -bigdecimal = "0.4.5" -byteorder = "1.5.0" chrono = "0.4" -datafusion = { workspace = true } +clickhouse = { version = "0.13.3", optional = true } +dashmap = "6.1.0" +datafusion = { workspace = true, default-features = false } datafusion-expr = { workspace = true, optional = true } +datafusion-federation = { workspace = true, features = [ + "sql", +], optional = true } datafusion-physical-expr = { workspace = true, optional = true } datafusion-physical-plan = { workspace = true, optional = true } datafusion-proto = { workspace = true, optional = true } -duckdb = { version = "1.3.2", features = [ +duckdb = { workspace = true, features = [ "bundled", "r2d2", "vtab", "vtab-arrow", "appender-arrow", ], optional = true } +libduckdb-sys = { version = "=1.3.0", optional = true } +dyn-clone = { version = "1.0", optional = true } fallible-iterator = "0.3.0" -futures = "0.3.30" -mysql_async = { version = "0.35.1", features = ["native-tls-tls", "chrono", "hdrhistogram", "bigdecimal", "time"], optional = true } +fundu = "2.0.1" +futures = "0.3" +geo-types = "0.7" +itertools = "0.14.0" +mysql_async = { version = "0.36", features = [ + "native-tls-tls", + "chrono", + "time", + "bigdecimal", +], optional = true } mongodb = { version = "3.2.2", features = ["openssl-tls"], optional = true } +native-tls = { version = "0.2", optional = true } +num-bigint = "0.4" num-traits = { version = "0.2", optional = true } -prost = { version = "0.13.2", optional = true } -rand = "0.8.5" -r2d2 = { version = "0.8.10", optional = true } -rust_decimal = { version = "1.38", optional = true, features = ["db-postgres"] } -rusqlite = { version = "0.31.0", optional = true } -sea-query = { git = "https://github.com/spiceai/sea-query.git", rev = "213b6b876068f58159ebdd5852604a021afaebf9", features = ["backend-sqlite", "backend-postgres", "postgres-array", "with-rust_decimal", "with-bigdecimal", "with-time", "with-chrono"] } -secrecy = "0.10.3" -serde = { version = "1.0.209", features = ["derive"] } -serde_json = "1.0.124" -snafu = "0.8.3" -time = "0.3.36" -tokio = { version = "1.38.0", features = ["macros", "fs"] } -tokio-postgres = { version = "0.7.10", features = ["with-chrono-0_4", "with-uuid-1", "with-serde_json-1", "with-geo-types-0_7"], optional = true } -tracing = "0.1.40" -uuid = { version = "1.9.1", optional = true } +odbc-api = { version = "19.0", optional = true } +pem = { version = "3.0.4", optional = true } postgres-native-tls = { version = "0.5.0", optional = true } -bb8 = { version = "0.8", optional = true } -bb8-postgres = { version = "0.8", optional = true } -native-tls = { version = "0.2.11", optional = true } +prost = { version = "0.14.1", optional = true } +rand = { version = "0.9" } +regex = { version = "1" } +r2d2 = { version = "0.8", optional = true } +rusqlite = { version = "0.32", optional = true } +sea-query = { version = "0.32", features = [ + "backend-sqlite", + "backend-postgres", + "postgres-array", + "with-rust_decimal", + "with-bigdecimal", + "with-time", + "with-chrono", +] } +secrecy = "0.10.3" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +sha2 = "0.10" +snafu = "0.8" +time = "0.3" +tokio = { version = "1.46", features = ["macros", "fs"] } +tokio-postgres = { version = "0.7", features = [ + "with-chrono-0_4", + "with-uuid-1", + "with-serde_json-1", + "with-geo-types-0_7", +], optional = true } +tokio-rusqlite = { version = "0.6.0", optional = true } +tonic = { version = "0.12", optional = true, features = [ + "tls-native-roots", + "tls-webpki-roots", +] } +tracing = "0.1" trust-dns-resolver = "0.23.2" -url = "2.5.1" -pem = { version = "3.0.4", optional = true } -tokio-rusqlite = { version = "0.5.1", optional = true } -tonic = { version = "0.12.2", optional = true } -datafusion-federation = { workspace = true, features = ["sql"] } -itertools = "0.13.0" -dyn-clone = { version = "1.0.17", optional = true } -geo-types = "0.7.13" -fundu = "2.0.1" +url = "2.5.4" +uuid = { version = "1.18", optional = true } +rust_decimal = { version = "1.38.0", features = ["db-postgres"] } [dev-dependencies] -anyhow = "1.0.86" -bollard = "0.16.1" -rand = "0.8.5" -reqwest = "0.12.5" -secrecy = "0.10.3" -tracing-subscriber = { version = "0.3.18", features = ["env-filter"] } -test-log = { version = "0.2.16", features = ["trace"] } -rstest = "0.22.0" -geozero = { version = "0.13.0", features = ["with-wkb"] } -tokio-stream = { version = "0.1.15", features = ["net"] } -insta = { version = "1.40.0", features = ["filters"] } -datafusion-physical-plan = { workspace = true } -tempfile = "3.8.1" +anyhow = "1.0" +bollard = "0.19" +geozero = { version = "0.14.0", features = ["with-wkb"] } +insta = { version = "1.43.2", features = ["filters"] } +prost = { version = "=0.13.5" } +rand = "0.9" +reqwest = "0.12" +rstest = "0.26.1" +test-log = { version = "0.2", features = ["trace"] } +tokio-stream = { version = "0.1", features = ["net"] } +tracing-subscriber = { version = "0.3", features = ["env-filter"] } +tempfile = "3.20.0" [features] -mysql = ["dep:mysql_async", "dep:async-stream"] -postgres = ["dep:tokio-postgres", "dep:rust_decimal", "dep:uuid", "dep:postgres-native-tls", "dep:bb8", "dep:bb8-postgres", "dep:native-tls", "dep:pem", "dep:async-stream", "dep:arrow-schema"] -sqlite = ["dep:rusqlite", "dep:tokio-rusqlite", "dep:arrow-schema"] -duckdb = ["dep:duckdb", "dep:r2d2", "dep:uuid", "dep:dyn-clone", "dep:async-stream", "dep:arrow-schema", "dep:byte-unit"] +clickhouse = ["dep:clickhouse", "arrow-ipc/lz4", "dep:async-stream"] +clickhouse-federation = ["clickhouse", "federation"] +duckdb = [ + "dep:duckdb", + "dep:r2d2", + "dep:uuid", + "dep:dyn-clone", + "dep:async-stream", + "dep:arrow-schema", + "dep:byte-unit", +] +duckdb-federation = ["duckdb", "federation"] +federation = ["dep:datafusion-federation"] flight = [ - "dep:arrow-array", "dep:arrow-flight", - "dep:arrow-schema", - "dep:base64", - "dep:bytes", - "dep:datafusion-expr", - "dep:datafusion-physical-expr", - "dep:datafusion-physical-plan", + "datafusion/serde", "dep:datafusion-proto", "dep:prost", "dep:tonic", ] -duckdb-federation = ["duckdb"] -sqlite-federation = ["sqlite"] -postgres-federation = ["postgres"] mongodb = [ "dep:mongodb", "dep:async-stream", "dep:arrow-schema", - "dep:rust_decimal", "dep:num-traits", ] +mysql = ["dep:mysql_async", "dep:async-stream"] +mysql-federation = ["mysql", "federation"] +odbc = ["dep:odbc-api", "dep:arrow-odbc", "dep:async-stream", "dep:dyn-clone"] +odbc-federation = ["odbc", "federation"] +postgres = [ + "dep:tokio-postgres", + "dep:uuid", + "dep:postgres-native-tls", + "dep:bb8", + "dep:bb8-postgres", + "dep:native-tls", + "dep:pem", + "dep:async-stream", + "dep:arrow-schema", +] +postgres-federation = ["postgres", "federation"] +sqlite = ["dep:rusqlite", "dep:tokio-rusqlite", "dep:arrow-schema"] +sqlite-federation = ["sqlite", "federation"] +sqlite-bundled = ["sqlite", "rusqlite/bundled"] -[[example]] -name = "duckdb" -path = "examples/duckdb.rs" -required-features = ["duckdb"] - -[[example]] -name = "duckdb_external_table" -path = "examples/duckdb_external_table.rs" -required-features = ["duckdb"] +# docs.rs-specific configuration +[package.metadata.docs.rs] +# document all features +all-features = true +# defines the configuration attribute `docsrs` +rustdoc-args = ["--cfg", "docsrs"] [[example]] -name = "duckdb_function" -path = "examples/duckdb_function.rs" -required-features = ["duckdb"] +name = "odbc_sqlite" +path = "examples/odbc_sqlite.rs" +required-features = ["sqlite", "odbc"] [[example]] name = "flight-sql" @@ -139,22 +186,16 @@ path = "examples/sqlite.rs" required-features = ["sqlite"] [[example]] -name = "mysql" -path = "examples/mysql.rs" -required-features = ["mysql"] - -[[example]] -name = "postgres" -path = "examples/postgres.rs" -required-features = ["postgres"] +name = "clickhouse" +path = "examples/clickhouse.rs" +required-features = ["clickhouse"] [[example]] name = "mongodb" path = "examples/mongodb.rs" required-features = ["mongodb"] -[[bench]] +[[bin]] name = "sqlite_insert_benchmark" -path = "benches/sqlite_insert_benchmark.rs" -harness = false +path = "../benches/sqlite_insert_benchmark.rs" required-features = ["sqlite"] diff --git a/core/examples/clickhouse.rs b/core/examples/clickhouse.rs new file mode 100644 index 00000000..676dcdef --- /dev/null +++ b/core/examples/clickhouse.rs @@ -0,0 +1,70 @@ +use std::collections::HashMap; + +use datafusion::prelude::SessionContext; +use datafusion::sql::TableReference; +use datafusion_table_providers::{ + clickhouse::{Arg, ClickHouseTableFactory}, + sql::db_connection_pool::clickhousepool::ClickHouseConnectionPool, + util::secrets::to_secret_map, +}; + +/// Example illustrates on how to use clickhouse client as a table factory +/// and create read only table providers which can be registered with datafusion session. +#[tokio::main] +async fn main() { + let param = to_secret_map(HashMap::from([ + ("url".to_string(), "http://localhost:8123".to_string()), + ("user".to_string(), "admin".to_string()), + ("password".to_string(), "secret".to_string()), + ])); + + let pool = ClickHouseConnectionPool::new(param).await.unwrap(); + + // Create a Datafusion session. + let ctx = SessionContext::new(); + + // Create a ClickHouse table factory + let table_factory = ClickHouseTableFactory::new(pool); + + // Using table factory, we can create table provider that queries a clickhouse table + let base_table = table_factory + .table_provider(TableReference::bare("Reports"), None) + .await + .unwrap(); + + // Demonstrate direct table provider registration + // This method registers the table in the default catalog + // Here we register the ClickHouse "Reports" table as "reports_v1" + ctx.register_table("reports_v1", base_table).unwrap(); + + // Using table factory, we can create table provider that queries a parameterized view in clickhouse with some arguments. + let view_table = table_factory + .table_provider( + TableReference::bare("Users"), + Some(vec![( + "workspace_uid".to_string(), + Arg::String("abc".to_string()), + )]), + ) + .await + .unwrap(); + + // Demonstrate direct table provider registration + // This method registers the table in the default catalog + // Here we register the "Users('abc')" view as "users" + ctx.register_table("users", view_table).unwrap(); + + let df = ctx + .sql("SELECT * FROM datafusion.public.reports_v1") + .await + .expect("select failed"); + + df.show().await.expect("show failed"); + + let df = ctx + .sql("SELECT * FROM datafusion.public.users") + .await + .expect("select failed"); + + df.show().await.expect("show failed"); +} diff --git a/core/examples/duckdb.rs b/core/examples/duckdb.rs index 25860cdd..898c9efb 100644 --- a/core/examples/duckdb.rs +++ b/core/examples/duckdb.rs @@ -1,53 +1,71 @@ use std::sync::Arc; -use datafusion::{prelude::SessionContext, sql::TableReference}; +use datafusion::prelude::SessionContext; +use datafusion::sql::TableReference; use datafusion_table_providers::{ - duckdb::DuckDBTableFactory, sql::db_connection_pool::duckdbpool::DuckDbConnectionPool, + common::DatabaseCatalogProvider, duckdb::DuckDBTableFactory, + sql::db_connection_pool::duckdbpool::DuckDbConnectionPool, }; use duckdb::AccessMode; -/// This example demonstrates how to create a DuckDBTableFactory and use it to create TableProviders -/// that can be registered with DataFusion. +/// This example demonstrates how to: +/// 1. Create a DuckDB connection pool +/// 2. Create and use DuckDBTableFactory to generate TableProvider +/// 3. Register TableProvider with DataFusion +/// 4. Use SQL queries to access DuckDB table data #[tokio::main] async fn main() { - // Opening in ReadOnly mode allows multiple reader processes to access the database at the same time. + // Create DuckDB connection pool + // Opening in ReadOnly mode allows multiple reader processes to access + // the database at the same time let duckdb_pool = Arc::new( - DuckDbConnectionPool::new_file("examples/duckdb_example.db", &AccessMode::ReadOnly) + DuckDbConnectionPool::new_file("core/examples/duckdb_example.db", &AccessMode::ReadOnly) .expect("unable to create DuckDB connection pool"), ); - let duckdb_table_factory = DuckDBTableFactory::new(duckdb_pool); + // Create DuckDB table provider factory + // Used to generate TableProvider instances that can read DuckDB table data + let table_factory = DuckDBTableFactory::new(duckdb_pool.clone()); - let companies_table = duckdb_table_factory - .table_provider(TableReference::bare("companies")) - .await - .expect("to create table provider"); - - let projects_table = duckdb_table_factory - .table_provider(TableReference::bare("projects")) - .await - .expect("to create table provider"); + // Create database catalog provider + // This allows us to access tables through catalog structure (catalog.schema.table) + let catalog = DatabaseCatalogProvider::try_new(duckdb_pool).await.unwrap(); + // Create DataFusion session context let ctx = SessionContext::new(); + // Register DuckDB catalog, making it accessible via the "duckdb" name + ctx.register_catalog("duckdb", Arc::new(catalog)); - // It's not required that the name registed in DataFusion matches the table name in DuckDB. - ctx.register_table("companies", companies_table) - .expect("to register table"); - - ctx.register_table("projects", projects_table) - .expect("to register table"); + // Demonstrate direct table provider registration + // This method registers the table in the default catalog + // Here we register the DuckDB "companies" table as "companies_v2" + ctx.register_table( + "companies_v2", + table_factory + .table_provider(TableReference::bare("companies")) + .await + .expect("failed to register table provider"), + ) + .expect("failed to register table"); + // Query Example 1: Query the renamed table through default catalog let df = ctx - .sql("SELECT * FROM companies") + .sql("SELECT * FROM datafusion.public.companies_v2") .await .expect("select failed"); - df.show().await.expect("show failed"); + // Query Example 2: Query the original table through DuckDB catalog let df = ctx - .sql("SELECT * FROM projects") + .sql("SELECT * FROM duckdb.main.companies") .await .expect("select failed"); + df.show().await.expect("show failed"); + // Query Example 3: Query the projects table in DuckDB + let df = ctx + .sql("SELECT * FROM duckdb.main.projects") + .await + .expect("select failed"); df.show().await.expect("show failed"); } diff --git a/core/examples/duckdb_example.db b/core/examples/duckdb_example.db new file mode 100644 index 00000000..ec7a2956 Binary files /dev/null and b/core/examples/duckdb_example.db differ diff --git a/core/examples/flight-sql.rs b/core/examples/flight-sql.rs index 0f49f8b0..6e8682dc 100644 --- a/core/examples/flight-sql.rs +++ b/core/examples/flight-sql.rs @@ -29,7 +29,7 @@ use std::sync::Arc; #[tokio::main] async fn main() -> datafusion::common::Result<()> { let ctx = SessionContext::new(); - let flight_sql = FlightTableFactory::new(Arc::new(FlightSqlDriver::default())); + let flight_sql = FlightTableFactory::new(Arc::new(FlightSqlDriver::new())); let table = flight_sql .open_table( "http://localhost:32010", diff --git a/core/examples/mysql.rs b/core/examples/mysql.rs index 34d68e66..4f562481 100644 --- a/core/examples/mysql.rs +++ b/core/examples/mysql.rs @@ -1,26 +1,30 @@ use std::{collections::HashMap, sync::Arc}; -use datafusion::{prelude::SessionContext, sql::TableReference}; +use datafusion::prelude::SessionContext; +use datafusion::sql::TableReference; use datafusion_table_providers::{ - mysql::MySQLTableFactory, sql::db_connection_pool::mysqlpool::MySQLConnectionPool, - util::secrets::to_secret_map, + common::DatabaseCatalogProvider, mysql::MySQLTableFactory, + sql::db_connection_pool::mysqlpool::MySQLConnectionPool, util::secrets::to_secret_map, }; -/// This example demonstrates how to register a table provider into DataFusion that -/// uses a MySQL table as its source. -/// -/// Use docker to start a MySQL server this example can connect to: +/// This example demonstrates how to: +/// 1. Create a MySQL connection pool +/// 2. Create and use MySQLTableFactory to generate TableProvider +/// 3. Register TableProvider with DataFusion +/// 4. Use SQL queries to access MySQL table data /// +/// Prerequisites: +/// Start a MySQL server using Docker: /// ```bash /// docker run --name mysql -e MYSQL_ROOT_PASSWORD=password -e MYSQL_DATABASE=mysql_db -p 3306:3306 -d mysql:9.0 /// # Wait for the MySQL server to start /// sleep 30 /// -/// # Create a table in the MySQL server and insert some data +/// # Create a table and insert sample data /// docker exec -i mysql mysql -uroot -ppassword mysql_db <, +} + +impl ClickHouseTableFactory { + pub fn new(pool: impl Into>) -> Self { + Self { pool: pool.into() } + } + + pub async fn table_provider( + &self, + table_reference: TableReference, + args: Option>, + ) -> Result, Box> + { + let client: &dyn AsyncDbConnection = &self.pool.client(); + let schema = client.get_schema(&table_reference).await?; + let table_provider = Arc::new(ClickHouseTable::new( + table_reference, + args, + self.pool.clone(), + schema, + Constraints::default(), + )); + + #[cfg(feature = "clickhouse-federation")] + let table_provider = Arc::new( + table_provider + .create_federated_table_provider() + .map_err(|e| Box::new(e) as Box)?, + ); + + Ok(table_provider) + } +} + +#[derive(Debug, Clone)] +pub enum Arg { + Unsigned(u64), + Signed(i64), + String(String), +} + +impl From for Arg { + fn from(value: String) -> Self { + Self::String(value) + } +} + +impl From for Arg { + fn from(value: u64) -> Self { + Self::Unsigned(value) + } +} + +impl From for Arg { + fn from(value: i64) -> Self { + Self::Signed(value) + } +} + +impl From for Expr { + fn from(value: Arg) -> Self { + Expr::value(match value { + Arg::Unsigned(x) => Value::Number(x.to_string(), false), + Arg::Signed(x) => Value::Number(x.to_string(), false), + Arg::String(x) => Value::SingleQuotedString(x), + }) + } +} + +fn into_table_args(args: Vec<(String, Arg)>) -> Vec { + args.into_iter() + .map(|(name, value)| FunctionArg::Named { + name: Ident::new(name), + arg: FunctionArgExpr::Expr(value.into()), + operator: FunctionArgOperator::Equals, + }) + .collect() +} + +pub struct ClickHouseTable { + table_reference: TableReference, + args: Option>, + pool: Arc, + schema: SchemaRef, + constraints: Constraints, + dialect: Arc, +} + +impl std::fmt::Debug for ClickHouseTable { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ClickHouseTable") + .field("table_name", &self.table_reference) + .field("schema", &self.schema) + .field("constraints", &self.constraints) + .finish() + } +} + +impl ClickHouseTable { + pub fn new( + table_reference: TableReference, + args: Option>, + pool: Arc, + schema: SchemaRef, + constraints: Constraints, + ) -> Self { + Self { + table_reference, + args, + pool, + schema, + constraints, + dialect: Arc::new(unparser::dialect::DefaultDialect {}), + } + } +} diff --git a/core/src/clickhouse/federation.rs b/core/src/clickhouse/federation.rs new file mode 100644 index 00000000..c18690af --- /dev/null +++ b/core/src/clickhouse/federation.rs @@ -0,0 +1,96 @@ +use crate::sql::db_connection_pool::dbconnection::{get_schema, Error as DbError}; +use crate::sql::db_connection_pool::{DbConnectionPool, JoinPushDown}; +use crate::sql::sql_provider_datafusion::{get_stream, to_execution_error}; +use arrow::datatypes::SchemaRef; +use async_trait::async_trait; +use datafusion::sql::unparser::dialect::Dialect; +use datafusion_federation::sql::{ + ast_analyzer::AstAnalyzer, RemoteTableRef, SQLExecutor, SQLFederationProvider, SQLTableSource, +}; +use datafusion_federation::{FederatedTableProviderAdaptor, FederatedTableSource}; +use futures::TryStreamExt; +use snafu::ResultExt; +use std::sync::Arc; + +use datafusion::{ + error::{DataFusionError, Result as DataFusionResult}, + execution::SendableRecordBatchStream, + physical_plan::stream::RecordBatchStreamAdapter, + sql::TableReference, +}; + +use super::ClickHouseTable; + +impl ClickHouseTable { + fn create_federated_table_source( + self: Arc, + ) -> DataFusionResult> { + let table_reference = self.table_reference.clone(); + let schema = self.schema.clone(); + let fed_provider = Arc::new(SQLFederationProvider::new(self)); + Ok(Arc::new(SQLTableSource::new_with_schema( + fed_provider, + RemoteTableRef::from(table_reference), + schema, + ))) + } + + pub fn create_federated_table_provider( + self: Arc, + ) -> DataFusionResult { + let table_source = Self::create_federated_table_source(Arc::clone(&self))?; + Ok(FederatedTableProviderAdaptor::new_with_provider( + table_source, + self, + )) + } +} + +#[async_trait] +impl SQLExecutor for ClickHouseTable { + fn name(&self) -> &str { + "clickhouse" + } + + fn compute_context(&self) -> Option { + match self.pool.join_push_down() { + JoinPushDown::Disallow => Some(format!("{}", std::ptr::from_ref(self) as usize)), + JoinPushDown::AllowedFor(s) => Some(s), + } + } + + fn dialect(&self) -> Arc { + self.dialect.clone() + } + + fn ast_analyzer(&self) -> Option { + None + } + + fn execute( + &self, + query: &str, + schema: SchemaRef, + ) -> DataFusionResult { + let fut = get_stream(self.pool.clone(), query.to_string(), Arc::clone(&schema)); + let stream = futures::stream::once(fut).try_flatten(); + Ok(Box::pin(RecordBatchStreamAdapter::new(schema, stream))) + } + + async fn table_names(&self) -> DataFusionResult> { + Err(DataFusionError::NotImplemented( + "table inference not implemented".to_string(), + )) + } + + async fn get_table_schema(&self, table_name: &str) -> DataFusionResult { + get_schema( + self.pool.connect().await?, + &TableReference::from(table_name), + ) + .await + .boxed() + .map_err(|e| DbError::UnableToGetSchema { source: e }) + .map_err(to_execution_error) + } +} diff --git a/core/src/clickhouse/sql_table.rs b/core/src/clickhouse/sql_table.rs new file mode 100644 index 00000000..2d33035e --- /dev/null +++ b/core/src/clickhouse/sql_table.rs @@ -0,0 +1,102 @@ +use async_trait::async_trait; +use datafusion::catalog::Session; +use datafusion::logical_expr::{LogicalPlan, LogicalPlanBuilder, LogicalTableSource}; +use datafusion::physical_plan::ExecutionPlan; +use datafusion::sql::sqlparser::ast::VisitMut; +use datafusion::sql::unparser::Unparser; +use std::fmt::Display; +use std::sync::Arc; +use std::{any::Any, fmt}; + +use datafusion::{ + arrow::datatypes::SchemaRef, + datasource::TableProvider, + error::Result as DataFusionResult, + logical_expr::{Expr, TableProviderFilterPushDown, TableType}, +}; + +use crate::sql::sql_provider_datafusion::{default_filter_pushdown, SqlExec}; +use crate::util::table_arg_replace::TableArgReplace; + +use super::{into_table_args, ClickHouseTable}; + +impl ClickHouseTable { + fn create_logical_plan( + &self, + projection: Option<&Vec>, + filters: &[Expr], + limit: Option, + ) -> DataFusionResult { + let table_source = LogicalTableSource::new(self.schema.clone()); + LogicalPlanBuilder::scan_with_filters( + self.table_reference.clone(), + Arc::new(table_source), + projection.cloned(), + filters.to_vec(), + )? + .limit(0, limit)? + .build() + } + + fn create_physical_plan( + &self, + projection: Option<&Vec>, + sql: String, + ) -> DataFusionResult> { + Ok(Arc::new(SqlExec::new( + projection, + &self.schema(), + self.pool.clone(), + sql, + )?)) + } +} + +#[async_trait] +impl TableProvider for ClickHouseTable { + fn as_any(&self) -> &dyn Any { + self + } + + fn schema(&self) -> SchemaRef { + self.schema.clone() + } + + fn table_type(&self) -> TableType { + TableType::Base + } + + fn supports_filters_pushdown( + &self, + filters: &[&Expr], + ) -> DataFusionResult> { + let filter_push_down = default_filter_pushdown(filters, &*self.dialect); + Ok(filter_push_down) + } + + async fn scan( + &self, + _state: &dyn Session, + projection: Option<&Vec>, + filters: &[Expr], + limit: Option, + ) -> DataFusionResult> { + let logical_plan = self.create_logical_plan(projection, filters, limit)?; + let mut sql = Unparser::new(&*self.dialect).plan_to_sql(&logical_plan)?; + + if let Some(args) = self.args.clone() { + let args = into_table_args(args); + let mut table_args = TableArgReplace::new(vec![(self.table_reference.clone(), args)]); + let _ = sql.visit(&mut table_args); + } + + let sql = sql.to_string(); + return self.create_physical_plan(projection, sql); + } +} + +impl Display for ClickHouseTable { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "ClickHouseTable {}", self.table_reference) + } +} diff --git a/core/src/common.rs b/core/src/common.rs new file mode 100644 index 00000000..14c06f84 --- /dev/null +++ b/core/src/common.rs @@ -0,0 +1,103 @@ +use std::{any::Any, sync::Arc}; + +use crate::sql::db_connection_pool::dbconnection::{get_schemas, get_tables}; +use crate::sql::db_connection_pool::DbConnectionPool; +use crate::sql::sql_provider_datafusion::SqlTable; +use async_trait::async_trait; +use dashmap::DashMap; +use datafusion::error::{DataFusionError, Result as DataFusionResult}; +use datafusion::{ + catalog::{CatalogProvider, SchemaProvider, TableProvider}, + sql::TableReference, +}; + +type Result = std::result::Result>; +type Pool = Arc + Send + Sync>; + +#[derive(Debug)] +pub struct DatabaseCatalogProvider { + schemas: DashMap>, +} + +impl DatabaseCatalogProvider { + pub async fn try_new(pool: Pool) -> Result { + let conn = pool.connect().await?; + + let schemas = get_schemas(conn).await?; + let schema_map = DashMap::new(); + + for schema in schemas { + let provider = DatabaseSchemaProvider::try_new(schema.clone(), pool.clone()).await?; + schema_map.insert(schema, Arc::new(provider) as Arc); + } + + Ok(Self { + schemas: schema_map, + }) + } +} + +impl CatalogProvider for DatabaseCatalogProvider { + fn as_any(&self) -> &dyn Any { + self + } + + fn schema_names(&self) -> Vec { + self.schemas.iter().map(|s| s.key().clone()).collect() + } + + fn schema(&self, name: &str) -> Option> { + self.schemas.get(name).map(|s| s.clone()) + } +} + +pub struct DatabaseSchemaProvider { + name: String, + tables: Vec, + pool: Pool, +} + +impl std::fmt::Debug for DatabaseSchemaProvider { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "DatabaseSchemaProvider {{ name: {:?} }}", self.name) + } +} + +impl DatabaseSchemaProvider { + pub async fn try_new(name: String, pool: Pool) -> Result { + let conn = pool.connect().await?; + let tables = get_tables(conn, &name).await?; + + Ok(Self { name, tables, pool }) + } +} + +#[async_trait] +impl SchemaProvider for DatabaseSchemaProvider { + fn as_any(&self) -> &dyn Any { + self + } + + fn table_names(&self) -> Vec { + self.tables.clone() + } + + async fn table(&self, table: &str) -> DataFusionResult>> { + if self.table_exist(table) { + SqlTable::new( + &self.name, + &self.pool, + TableReference::partial(self.name.clone(), table.to_string()), + ) + .await + .map(|v| Some(Arc::new(v) as Arc)) + .map_err(|e| DataFusionError::External(Box::new(e))) + } else { + Ok(None) + } + } + + fn table_exist(&self, name: &str) -> bool { + self.tables.contains(&name.to_string()) + } +} diff --git a/core/src/duckdb.rs b/core/src/duckdb.rs index eb7720ea..f74a7135 100644 --- a/core/src/duckdb.rs +++ b/core/src/duckdb.rs @@ -438,17 +438,16 @@ impl TableProviderFactory for DuckDBTableProviderFactory { let schema: SchemaRef = Arc::new(cmd.schema.as_ref().into()); - let table_definition = Arc::new( + let table_definition = TableDefinition::new(RelationName::new(name.clone()), Arc::clone(&schema)) .with_constraints(cmd.constraints.clone()) - .with_indexes(indexes.clone()), - ); + .with_indexes(indexes.clone()); let pool = Arc::new(pool); - make_initial_table(Arc::clone(&table_definition), &pool)?; + make_initial_table(Arc::new(table_definition.clone()), &pool)?; let table_writer_builder = DuckDBTableWriterBuilder::new() - .with_table_definition(Arc::clone(&table_definition)) + .with_table_definition(table_definition) .with_pool(pool) .set_on_conflict(on_conflict); @@ -658,7 +657,7 @@ impl DuckDBTableFactory { let table_writer_builder = DuckDBTableWriterBuilder::new() .with_read_provider(read_provider) .with_pool(Arc::clone(&self.pool)) - .with_table_definition(Arc::new(table_definition)); + .with_table_definition(table_definition); Ok(Arc::new(table_writer_builder.build()?)) } @@ -765,7 +764,7 @@ pub(crate) mod tests { order_exprs: vec![], unbounded: false, options, - constraints: Constraints::new_unverified(vec![]), + constraints: Constraints::default(), column_defaults: HashMap::new(), temporary: false, }; @@ -826,7 +825,7 @@ pub(crate) mod tests { order_exprs: vec![], unbounded: false, options, - constraints: Constraints::new_unverified(vec![]), + constraints: Constraints::default(), column_defaults: HashMap::new(), temporary: false, }; @@ -883,7 +882,7 @@ pub(crate) mod tests { order_exprs: vec![], unbounded: false, options, - constraints: Constraints::new_unverified(vec![]), + constraints: Constraints::default(), column_defaults: HashMap::new(), temporary: false, }; @@ -938,7 +937,7 @@ pub(crate) mod tests { order_exprs: vec![], unbounded: false, options, - constraints: Constraints::new_unverified(vec![]), + constraints: Constraints::default(), column_defaults: HashMap::new(), temporary: false, }; @@ -996,7 +995,7 @@ pub(crate) mod tests { order_exprs: vec![], unbounded: false, options, - constraints: Constraints::new_unverified(vec![]), + constraints: Constraints::default(), column_defaults: HashMap::new(), temporary: false, }; diff --git a/core/src/duckdb/creator.rs b/core/src/duckdb/creator.rs index 4a3eb058..5dd47a2a 100644 --- a/core/src/duckdb/creator.rs +++ b/core/src/duckdb/creator.rs @@ -764,8 +764,8 @@ pub(crate) mod tests { dbconnection::duckdbconn::DuckDbConnection, duckdbpool::DuckDbConnectionPool, }, }; - use arrow::array::RecordBatch; use datafusion::{ + arrow::array::RecordBatch, common::SchemaExt, datasource::sink::DataSink, execution::{SendableRecordBatchStream, TaskContext}, @@ -803,7 +803,7 @@ pub(crate) mod tests { .expect("to build parquet reader"); parquet_reader - .collect::, arrow::error::ArrowError>>() + .collect::, datafusion::arrow::error::ArrowError>>() .expect("to get records") } diff --git a/core/src/duckdb/federation.rs b/core/src/duckdb/federation.rs index 80f71558..8e31298a 100644 --- a/core/src/duckdb/federation.rs +++ b/core/src/duckdb/federation.rs @@ -1,6 +1,6 @@ use crate::sql::db_connection_pool::dbconnection::{get_schema, Error as DbError}; use crate::sql::sql_provider_datafusion::{get_stream, to_execution_error}; -use arrow::datatypes::SchemaRef; +use datafusion::arrow::datatypes::SchemaRef; use datafusion::sql::unparser::dialect::Dialect; use datafusion_federation::sql::{ RemoteTableRef, SQLExecutor, SQLFederationProvider, SQLTableSource, @@ -25,12 +25,12 @@ impl DuckDBTable { fn create_federated_table_source( self: Arc, ) -> DataFusionResult> { - let table_name = self.base_table.table_reference.clone(); + let table_reference = self.base_table.table_reference.clone(); let schema = Arc::clone(&Arc::clone(&self).base_table.schema()); let fed_provider = Arc::new(SQLFederationProvider::new(self)); Ok(Arc::new(SQLTableSource::new_with_schema( fed_provider, - RemoteTableRef::from(table_name), + RemoteTableRef::from(table_reference), schema, ))) } diff --git a/core/src/duckdb/sql_table.rs b/core/src/duckdb/sql_table.rs index a5a0341c..bcbd68a1 100644 --- a/core/src/duckdb/sql_table.rs +++ b/core/src/duckdb/sql_table.rs @@ -1,8 +1,6 @@ use crate::sql::db_connection_pool::DbConnectionPool; -use crate::sql::sql_provider_datafusion::expr::Engine; use async_trait::async_trait; use datafusion::catalog::Session; -use datafusion::common::Constraints; use datafusion::sql::unparser::dialect::Dialect; use futures::TryStreamExt; use std::collections::HashMap; @@ -30,6 +28,9 @@ pub struct DuckDBTable { /// A mapping of table/view names to `DuckDB` functions that can instantiate a table (e.g. "`read_parquet`('`my_file.parquet`')"). pub(crate) table_functions: Option>, + + /// Constraints on the table. + pub(crate) constraints: Option, } impl std::fmt::Debug for DuckDBTable { @@ -47,21 +48,15 @@ impl DuckDBTable { table_reference: impl Into, table_functions: Option>, dialect: Option>, - constraints: Option, + constraints: Option, ) -> Self { - let base_table = SqlTable::new_with_schema( - "duckdb", - pool, - schema, - table_reference, - Some(Engine::DuckDB), - ) - .with_dialect(dialect.unwrap_or(Arc::new(DuckDBDialect::new()))) - .with_constraints_opt(constraints); + let base_table = SqlTable::new_with_schema("duckdb", pool, schema, table_reference) + .with_dialect(dialect.unwrap_or(Arc::new(DuckDBDialect::new()))); Self { base_table, table_functions, + constraints, } } @@ -72,14 +67,12 @@ impl DuckDBTable { filters: &[Expr], limit: Option, ) -> DataFusionResult> { + let sql = self.base_table.scan_to_sql(projections, filters, limit)?; Ok(Arc::new(DuckSqlExec::new( projections, schema, - &self.base_table.table_reference, self.base_table.clone_pool(), - filters, - limit, - self.table_functions.clone(), + sql, )?)) } } @@ -94,10 +87,6 @@ impl TableProvider for DuckDBTable { self.base_table.schema() } - fn constraints(&self) -> Option<&Constraints> { - self.base_table.constraints() - } - fn table_type(&self) -> TableType { self.base_table.table_type() } @@ -109,6 +98,10 @@ impl TableProvider for DuckDBTable { self.base_table.supports_filters_pushdown(filters) } + fn constraints(&self) -> Option<&datafusion::common::Constraints> { + self.constraints.as_ref() + } + async fn scan( &self, _state: &dyn Session, @@ -136,25 +129,14 @@ impl DuckSqlExec { fn new( projections: Option<&Vec>, schema: &SchemaRef, - table_reference: &TableReference, pool: Arc + Send + Sync>, - filters: &[Expr], - limit: Option, - table_functions: Option>, + sql: String, ) -> DataFusionResult { - let base_exec = SqlExec::new( - projections, - schema, - table_reference, - pool, - filters, - limit, - Some(Engine::DuckDB), - )?; + let base_exec = SqlExec::new(projections, schema, pool, sql)?; Ok(Self { base_exec, - table_functions, + table_functions: None, }) } diff --git a/core/src/duckdb/write.rs b/core/src/duckdb/write.rs index 6c7b7ea8..e815aa0c 100644 --- a/core/src/duckdb/write.rs +++ b/core/src/duckdb/write.rs @@ -3,7 +3,6 @@ use std::{any::Any, fmt, sync::Arc}; use crate::duckdb::DuckDB; use crate::sql::db_connection_pool::duckdbpool::DuckDbConnectionPool; -use crate::util::constraints::UpsertOptions; use crate::util::{ constraints, on_conflict::OnConflict, @@ -88,7 +87,7 @@ impl DuckDBTableWriterBuilder { } #[must_use] - pub fn with_table_definition(mut self, table_definition: Arc) -> Self { + pub fn with_table_definition(mut self, table_definition: TableDefinition) -> Self { self.table_definition = Some(table_definition); self } @@ -123,7 +122,7 @@ impl DuckDBTableWriterBuilder { Ok(DuckDBTableWriter { read_provider, on_conflict: self.on_conflict, - table_definition, + table_definition: Arc::new(table_definition), pool, on_data_written: self.on_data_written, }) @@ -157,6 +156,12 @@ impl std::fmt::Debug for DuckDBTableWriter { } } +impl std::fmt::Debug for DuckDBTableWriter { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "DuckDBTableWriter") + } +} + impl DuckDBTableWriter { #[must_use] pub fn pool(&self) -> Arc { @@ -214,7 +219,7 @@ impl TableProvider for DuckDBTableWriter { &self, _state: &dyn Session, input: Arc, - overwrite: InsertOp, + op: InsertOp, ) -> datafusion::error::Result> { let mut sink = DuckDBDataSink::new( Arc::clone(&self.pool), @@ -302,47 +307,35 @@ impl DataSink for DuckDBDataSink { Ok(num_rows) }); - let upsert_options = self - .on_conflict - .as_ref() - .map_or_else(UpsertOptions::default, |conflict| { - conflict.get_upsert_options() - }); - while let Some(batch) = data.next().await { let batch = batch.map_err(check_and_mark_retriable_error)?; - let batches = if let Some(constraints) = self.table_definition.constraints() { + if let Some(constraints) = self.table_definition.constraints() { constraints::validate_batch_with_constraints( - vec![batch], + std::slice::from_ref(&batch), constraints, - &upsert_options, ) .await .context(super::ConstraintViolationSnafu) - .map_err(to_datafusion_error)? - } else { - vec![batch] - }; - - for batch in batches { - if let Err(send_error) = batch_tx.send(batch).await { - match duckdb_write_handle.await { - Err(join_error) => { - return Err(DataFusionError::Execution(format!( - "Error writing to DuckDB: {join_error}" - ))); - } - Ok(Err(datafusion_error)) => { - return Err(datafusion_error); - } - _ => { - return Err(DataFusionError::Execution(format!( - "Unable to send RecordBatch to DuckDB writer: {send_error}" - ))) - } - }; - } + .map_err(to_datafusion_error)?; + } + + if let Err(send_error) = batch_tx.send(batch).await { + match duckdb_write_handle.await { + Err(join_error) => { + return Err(DataFusionError::Execution(format!( + "Error writing to DuckDB: {join_error}" + ))); + } + Ok(Err(datafusion_error)) => { + return Err(datafusion_error); + } + _ => { + return Err(DataFusionError::Execution(format!( + "Unable to send RecordBatch to DuckDB writer: {send_error}" + ))) + } + }; } } @@ -763,8 +756,7 @@ impl RecordBatchReader for RecordBatchReaderFromStream { #[cfg(test)] mod test { use arrow::array::{Int64Array, StringArray}; - use datafusion::datasource::sink::DataSink; - use datafusion_physical_plan::memory::MemoryStream; + use datafusion::physical_plan::memory::MemoryStream; use super::*; use crate::{ diff --git a/core/src/flight.rs b/core/src/flight.rs index 5815b583..7e532a8d 100644 --- a/core/src/flight.rs +++ b/core/src/flight.rs @@ -20,27 +20,30 @@ use std::any::Any; use std::collections::HashMap; +use std::error::Error; use std::fmt::Debug; use std::sync::Arc; use crate::flight::exec::FlightExec; use arrow_flight::error::FlightError; use arrow_flight::FlightInfo; -use arrow_schema::SchemaRef; use async_trait::async_trait; +use datafusion::arrow::datatypes::SchemaRef; use datafusion::catalog::{Session, TableProviderFactory}; use datafusion::common::stats::Precision; use datafusion::common::{DataFusionError, Statistics}; use datafusion::datasource::TableProvider; +use datafusion::logical_expr::{CreateExternalTable, Expr, TableType}; use datafusion::physical_plan::ExecutionPlan; -use datafusion_expr::{CreateExternalTable, Expr, TableType}; use serde::{Deserialize, Serialize}; -use tonic::transport::Channel; +use tonic::transport::{Channel, ClientTlsConfig}; pub mod codec; mod exec; pub mod sql; +pub use exec::enforce_schema; + /// Generic Arrow Flight data source. Requires a [FlightDriver] that allows implementors /// to integrate any custom Flight RPC service by producing a [FlightMetadata] for some DDL. /// @@ -80,7 +83,7 @@ pub mod sql; /// CustomFlightDriver::default(), /// ))), /// ); -/// _ = ctx.sql( +/// let _ = ctx.sql( /// r#" /// CREATE EXTERNAL TABLE custom_flight_table STORED AS CUSTOM_FLIGHT /// LOCATION 'https://custom.flight.rpc' @@ -107,28 +110,31 @@ impl FlightTableFactory { options: HashMap, ) -> datafusion::common::Result { let origin = entry_point.into(); - let channel = Channel::from_shared(origin.clone()) - .unwrap() - .connect() - .await - .map_err(|e| DataFusionError::External(Box::new(e)))?; + let channel = flight_channel(&origin).await?; let metadata = self .driver .metadata(channel.clone(), &options) .await - .map_err(|e| DataFusionError::External(Box::new(e)))?; + .map_err(to_df_err)?; let num_rows = precision(metadata.info.total_records); let total_byte_size = precision(metadata.info.total_bytes); - let logical_schema = metadata.schema; + let logical_schema = metadata.schema.clone(); let stats = Statistics { num_rows, total_byte_size, column_statistics: vec![], }; + let metadata_supplier = if metadata.props.reusable_flight_info { + MetadataSupplier::Reusable(Arc::new(metadata)) + } else { + MetadataSupplier::Refresh { + driver: self.driver.clone(), + channel, + options, + } + }; Ok(FlightTable { - driver: self.driver.clone(), - channel, - options, + metadata_supplier, origin, logical_schema, stats, @@ -136,14 +142,6 @@ impl FlightTableFactory { } } -fn precision(total: i64) -> Precision { - if total < 0 { - Precision::Absent - } else { - Precision::Exact(total as usize) - } -} - #[async_trait] impl TableProviderFactory for FlightTableFactory { async fn create( @@ -177,30 +175,26 @@ pub trait FlightDriver: Sync + Send + Debug { pub struct FlightMetadata { /// FlightInfo object produced by the driver info: FlightInfo, - /// Arrow schema. Can be enforced by the driver or inferred from the FlightInfo - schema: SchemaRef, /// Various knobs that control execution props: FlightProperties, + /// Arrow schema. Can be enforced by the driver or inferred from the FlightInfo + schema: SchemaRef, } impl FlightMetadata { /// Customize everything that is in the driver's control - pub fn new(info: FlightInfo, schema: SchemaRef, props: FlightProperties) -> Self { + pub fn new(info: FlightInfo, props: FlightProperties, schema: SchemaRef) -> Self { Self { info, - schema, props, + schema, } } - /// Customize gRPC headers - pub fn try_new( - info: FlightInfo, - grpc_headers: HashMap, - ) -> arrow_flight::error::Result { + /// Customize flight properties and try to use the FlightInfo schema + pub fn try_new(info: FlightInfo, props: FlightProperties) -> arrow_flight::error::Result { let schema = Arc::new(info.clone().try_decode_schema()?); - let props = grpc_headers.into(); - Ok(Self::new(info, schema, props)) + Ok(Self::new(info, props, schema)) } } @@ -208,38 +202,105 @@ impl TryFrom for FlightMetadata { type Error = FlightError; fn try_from(info: FlightInfo) -> Result { - Self::try_new(info, HashMap::default()) + Self::try_new(info, FlightProperties::default()) } } /// Meant to gradually encapsulate all sorts of knobs required /// for controlling the protocol and query execution details. -#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] pub struct FlightProperties { - unbounded_stream: bool, + unbounded_streams: bool, grpc_headers: HashMap, + size_limits: SizeLimits, + reusable_flight_info: bool, } impl FlightProperties { - pub fn new(unbounded_stream: bool, grpc_headers: HashMap) -> Self { + pub fn new() -> Self { + Default::default() + } + + /// Whether the service will produce infinite streams + pub fn with_unbounded_streams(mut self, unbounded_streams: bool) -> Self { + self.unbounded_streams = unbounded_streams; + self + } + + /// gRPC headers to use on subsequent calls. + pub fn with_grpc_headers(mut self, grpc_headers: HashMap) -> Self { + self.grpc_headers = grpc_headers; + self + } + + /// Max sizes in bytes for encoded/decoded gRPC messages. + pub fn with_size_limits(mut self, size_limits: SizeLimits) -> Self { + self.size_limits = size_limits; + self + } + + /// Whether the FlightInfo objects produced by the service can be used multiple times + /// or need to be refreshed before every table scan. + pub fn with_reusable_flight_info(mut self, reusable_flight_info: bool) -> Self { + self.reusable_flight_info = reusable_flight_info; + self + } +} + +/// Message size limits to be passed to the underlying gRPC library. +#[derive(Copy, Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct SizeLimits { + encoding: usize, + decoding: usize, +} + +impl SizeLimits { + pub fn new(encoding: usize, decoding: usize) -> Self { + Self { encoding, decoding } + } +} + +impl Default for SizeLimits { + fn default() -> Self { Self { - unbounded_stream, - grpc_headers, + // no limits + encoding: usize::MAX, + decoding: usize::MAX, } } } -impl From> for FlightProperties { - fn from(grpc_headers: HashMap) -> Self { - Self::new(false, grpc_headers) +#[derive(Clone, Debug)] +enum MetadataSupplier { + Reusable(Arc), + Refresh { + driver: Arc, + channel: Channel, + options: HashMap, + }, +} + +impl MetadataSupplier { + async fn flight_metadata(&self) -> datafusion::common::Result> { + match self { + Self::Reusable(metadata) => Ok(metadata.clone()), + Self::Refresh { + driver, + channel, + options, + } => Ok(Arc::new( + driver + .metadata(channel.clone(), options) + .await + .map_err(to_df_err)?, + )), + } } } /// Table provider that wraps a specific flight from an Arrow Flight service pub struct FlightTable { - driver: Arc, - channel: Channel, - options: HashMap, + metadata_supplier: MetadataSupplier, origin: String, logical_schema: SchemaRef, stats: Statistics, @@ -250,7 +311,6 @@ impl std::fmt::Debug for FlightTable { f.debug_struct("FlightTable") .field("origin", &self.origin) .field("logical_schema", &self.logical_schema) - .field("options", &self.options) .field("stats", &self.stats) .finish() } @@ -277,13 +337,9 @@ impl TableProvider for FlightTable { _filters: &[Expr], _limit: Option, ) -> datafusion::common::Result> { - let metadata = self - .driver - .metadata(self.channel.clone(), &self.options) - .await - .map_err(|e| DataFusionError::External(Box::new(e)))?; + let metadata = self.metadata_supplier.flight_metadata().await?; Ok(Arc::new(FlightExec::try_new( - metadata, + metadata.as_ref(), projection, &self.origin, )?)) @@ -293,3 +349,26 @@ impl TableProvider for FlightTable { Some(self.stats.clone()) } } + +fn to_df_err(err: E) -> DataFusionError { + DataFusionError::External(Box::new(err)) +} + +async fn flight_channel(source: impl Into) -> datafusion::common::Result { + let tls_config = ClientTlsConfig::new().with_enabled_roots(); + Channel::from_shared(source.into()) + .map_err(to_df_err)? + .tls_config(tls_config) + .map_err(to_df_err)? + .connect() + .await + .map_err(to_df_err) +} + +fn precision(total: i64) -> Precision { + if total < 0 { + Precision::Absent + } else { + Precision::Exact(total as usize) + } +} diff --git a/core/src/flight/codec.rs b/core/src/flight/codec.rs index 4ba5fae3..11b43d91 100644 --- a/core/src/flight/codec.rs +++ b/core/src/flight/codec.rs @@ -20,9 +20,10 @@ use std::sync::Arc; use crate::flight::exec::{FlightConfig, FlightExec}; +use crate::flight::to_df_err; use datafusion::common::DataFusionError; -use datafusion_expr::registry::FunctionRegistry; -use datafusion_physical_plan::ExecutionPlan; +use datafusion::logical_expr::registry::FunctionRegistry; +use datafusion::physical_plan::ExecutionPlan; use datafusion_proto::physical_plan::PhysicalExtensionCodec; /// Physical extension codec for FlightExec @@ -37,8 +38,7 @@ impl PhysicalExtensionCodec for FlightPhysicalCodec { _registry: &dyn FunctionRegistry, ) -> datafusion::common::Result> { if inputs.is_empty() { - let config: FlightConfig = - serde_json::from_slice(buf).map_err(|e| DataFusionError::External(Box::new(e)))?; + let config: FlightConfig = serde_json::from_slice(buf).map_err(to_df_err)?; Ok(Arc::from(FlightExec::from(config))) } else { Err(DataFusionError::Internal( @@ -53,8 +53,7 @@ impl PhysicalExtensionCodec for FlightPhysicalCodec { buf: &mut Vec, ) -> datafusion::common::Result<()> { if let Some(flight) = node.as_any().downcast_ref::() { - let mut bytes = serde_json::to_vec(flight.config()) - .map_err(|e| DataFusionError::External(Box::new(e)))?; + let mut bytes = serde_json::to_vec(flight.config()).map_err(to_df_err)?; buf.append(&mut bytes); Ok(()) } else { diff --git a/core/src/flight/exec.rs b/core/src/flight/exec.rs index 0e89c6b8..e1a60d27 100644 --- a/core/src/flight/exec.rs +++ b/core/src/flight/exec.rs @@ -23,23 +23,26 @@ use std::fmt::{Debug, Formatter}; use std::str::FromStr; use std::sync::Arc; -use crate::flight::{FlightMetadata, FlightProperties}; -use arrow_array::RecordBatch; +use crate::flight::{flight_channel, to_df_err, FlightMetadata, FlightProperties, SizeLimits}; +use crate::sql::db_connection_pool::runtime::run_async_with_tokio; use arrow_flight::error::FlightError; +use arrow_flight::flight_service_client::FlightServiceClient; use arrow_flight::{FlightClient, FlightEndpoint, Ticket}; -use arrow_schema::SchemaRef; +use datafusion::arrow::array::{new_null_array, ArrayRef, RecordBatch}; +use datafusion::arrow::compute::cast; use datafusion::arrow::datatypes::ToByteSlice; +use datafusion::arrow::datatypes::{Field, SchemaRef}; +use datafusion::arrow::error::ArrowError; use datafusion::common::Result; use datafusion::common::{project_schema, DataFusionError}; use datafusion::execution::{SendableRecordBatchStream, TaskContext}; -use datafusion_physical_expr::{EquivalenceProperties, Partitioning}; -use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType}; -use datafusion_physical_plan::stream::RecordBatchStreamAdapter; -use datafusion_physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties}; +use datafusion::physical_expr::{EquivalenceProperties, Partitioning}; +use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType}; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; +use datafusion::physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties}; use futures::{StreamExt, TryStreamExt}; use serde::{Deserialize, Serialize}; use tonic::metadata::{AsciiMetadataKey, MetadataMap}; -use tonic::transport::Channel; /// Arrow Flight physical plan that maps flight endpoints to partitions #[derive(Clone, Debug)] @@ -53,7 +56,7 @@ impl FlightExec { /// Creates a FlightExec with the provided [FlightMetadata] /// and origin URL (used as fallback location as per the protocol spec). pub fn try_new( - metadata: FlightMetadata, + metadata: &FlightMetadata, projection: Option<&Vec>, origin: &str, ) -> Result { @@ -68,7 +71,7 @@ impl FlightExec { origin: origin.into(), schema, partitions, - properties: metadata.props, + properties: metadata.props.clone(), }; Ok(config.into()) } @@ -80,7 +83,7 @@ impl FlightExec { impl From for FlightExec { fn from(config: FlightConfig) -> Self { - let boundedness = if config.properties.unbounded_stream { + let exec_mode = if config.properties.unbounded_streams { Boundedness::Unbounded { requires_infinite_memory: false, } @@ -91,7 +94,7 @@ impl From for FlightExec { EquivalenceProperties::new(config.schema.clone()), Partitioning::UnknownPartitioning(config.partitions.len()), EmissionType::Incremental, - boundedness, + exec_mode, ); let mut mm = MetadataMap::new(); for (k, v) in config.properties.grpc_headers.iter() { @@ -166,21 +169,31 @@ impl FlightPartition { } } +async fn flight_client( + source: impl Into, + grpc_headers: &MetadataMap, + size_limits: &SizeLimits, +) -> Result { + let channel = flight_channel(source).await?; + let inner_client = FlightServiceClient::new(channel) + .max_encoding_message_size(size_limits.encoding) + .max_decoding_message_size(size_limits.decoding); + let mut client = FlightClient::new_from_inner(inner_client); + client.metadata_mut().clone_from(grpc_headers); + Ok(client) +} + async fn flight_stream( partition: FlightPartition, schema: SchemaRef, grpc_headers: Arc, + size_limits: SizeLimits, ) -> Result { let mut errors: Vec> = vec![]; for loc in partition.locations.iter() { - match try_fetch_stream( - loc, - partition.ticket.clone(), - schema.clone(), - grpc_headers.clone(), - ) - .await - { + let get_client = || async { flight_client(loc, grpc_headers.as_ref(), &size_limits).await }; + let client = run_async_with_tokio(get_client).await?; + match try_fetch_stream(client, &partition.ticket, schema.clone()).await { Ok(stream) => return Ok(stream), Err(e) => errors.push(Box::new(e)), } @@ -195,48 +208,63 @@ async fn flight_stream( } async fn try_fetch_stream( - source: impl Into, - ticket: FlightTicket, + mut client: FlightClient, + ticket: &FlightTicket, schema: SchemaRef, - grpc_headers: Arc, ) -> arrow_flight::error::Result { let ticket = Ticket::new(ticket.0.to_vec()); - let channel = Channel::from_shared(source.into()) - .map_err(|e| FlightError::ExternalError(Box::new(e)))? - .connect() - .await - .map_err(|e| FlightError::ExternalError(Box::new(e)))?; - let mut client = FlightClient::new(channel); - client.metadata_mut().clone_from(grpc_headers.as_ref()); - let stream = client.do_get(ticket).await?; + let stream = client.do_get(ticket).await?.map_err(to_df_err); Ok(Box::pin(RecordBatchStreamAdapter::new( schema.clone(), - stream.map(move |rb| { - let schema = schema.clone(); - rb.map(move |rb| { - if schema.fields.is_empty() || rb.schema() == schema { - rb - } else if schema.contains(rb.schema_ref()) { - rb.with_schema(schema.clone()).unwrap() - } else { - let columns = schema - .fields - .iter() - .map(|field| { - rb.column_by_name(field.name()) - .expect("missing fields in record batch") - .clone() - }) - .collect(); - RecordBatch::try_new(schema.clone(), columns) - .expect("cannot impose desired schema on record batch") - } - }) - .map_err(|e| DataFusionError::External(Box::new(e))) - }), + stream.map(move |item| item.and_then(|rb| enforce_schema(rb, &schema).map_err(Into::into))), ))) } +/// Best-effort morphing of a record batch into a new schema. +/// - drops columns if the corresponding field is missing from the target schema +/// - attempts a column-wise cast if data types are different +/// - generates missing columns as all-nulls if the corresponding field is nullable +pub fn enforce_schema( + batch: RecordBatch, + target_schema: &SchemaRef, +) -> datafusion::arrow::error::Result { + if target_schema.fields.is_empty() || batch.schema() == *target_schema { + Ok(batch) + } else if target_schema.contains(batch.schema_ref()) { + batch.with_schema(target_schema.to_owned()) + } else { + let columns = target_schema + .fields + .iter() + .map(|field| find_matching_column(&batch, field.as_ref())) + .collect::>()?; + RecordBatch::try_new(target_schema.to_owned(), columns) + } +} + +/// For a target schema field, extract the column with the same name if present in the +/// record batch and cast it to the desired data type if needed. If the column is missing +/// but the target schema field is nullable, generates a null-array column. +fn find_matching_column( + batch: &RecordBatch, + field: &Field, +) -> datafusion::arrow::error::Result { + if let Some(column) = batch.column_by_name(field.name()) { + if column.data_type() == field.data_type() { + Ok(column.to_owned()) + } else { + cast(column.as_ref(), field.data_type()) + } + } else if field.is_nullable() { + Ok(new_null_array(field.data_type(), batch.num_rows())) + } else { + Err(ArrowError::SchemaError(format!( + "Required field `{}` is missing from the original record batch", + field.name() + ))) + } +} + impl DisplayAs for FlightExec { fn fmt_as(&self, t: DisplayFormatType, f: &mut Formatter) -> std::fmt::Result { match t { @@ -288,6 +316,7 @@ impl ExecutionPlan for FlightExec { self.config.partitions[partition].clone(), self.schema(), self.metadata_map.clone(), + self.config.properties.size_limits, ); let stream = futures::stream::once(future_stream).try_flatten(); Ok(Box::pin(RecordBatchStreamAdapter::new( @@ -299,9 +328,12 @@ impl ExecutionPlan for FlightExec { #[cfg(test)] mod tests { - use crate::flight::exec::{FlightConfig, FlightPartition, FlightTicket}; - use crate::flight::FlightProperties; - use arrow_schema::{DataType, Field, Schema}; + use crate::flight::exec::{enforce_schema, FlightConfig, FlightPartition, FlightTicket}; + use crate::flight::{FlightProperties, SizeLimits}; + use datafusion::arrow::array::{ + BooleanArray, Float32Array, Int32Array, RecordBatch, StringArray, StructArray, + }; + use datafusion::arrow::datatypes::{DataType, Field, Fields, Schema}; use std::collections::HashMap; use std::sync::Arc; @@ -322,10 +354,13 @@ mod tests { }, ] .into(); - let properties = FlightProperties::new( - true, - HashMap::from([("h1".into(), "v1".into()), ("h2".into(), "v2".into())]), - ); + let properties = FlightProperties::default() + .with_unbounded_streams(true) + .with_grpc_headers(HashMap::from([ + ("h1".into(), "v1".into()), + ("h2".into(), "v2".into()), + ])) + .with_size_limits(SizeLimits::new(1024, 1024)); let config = FlightConfig { origin: "http://localhost:50050".into(), schema, @@ -336,4 +371,101 @@ mod tests { let restored = serde_json::from_slice(json.as_slice()).expect("cannot decode json config"); assert_eq!(config, restored); } + + #[test] + fn test_schema_enforcement() { + let data = StructArray::new( + Fields::from(vec![ + Arc::new(Field::new("f_int", DataType::Int32, true)), + Arc::new(Field::new("f_bool", DataType::Boolean, false)), + ]), + vec![ + Arc::new(Int32Array::from(vec![10, 20])), + Arc::new(BooleanArray::from(vec![true, false])), + ], + None, + ); + let input_rb = RecordBatch::from(data); + + let empty_schema = Arc::new(Schema::empty()); + let same_rb = + enforce_schema(input_rb.clone(), &empty_schema).expect("error enforcing empty schema"); + assert_eq!(input_rb, same_rb); + + let coerced_rb = enforce_schema( + input_rb.clone(), + &Arc::new(Schema::new(vec![ + // compatible yet different types with flipped nullability + Arc::new(Field::new("f_int", DataType::Float32, false)), + Arc::new(Field::new("f_bool", DataType::Utf8, true)), + ])), + ) + .expect("error enforcing a compatible schema"); + assert_ne!(input_rb, coerced_rb); + assert_eq!(coerced_rb.num_columns(), 2); + assert_eq!(coerced_rb.num_rows(), 2); + assert_eq!( + coerced_rb.column(0).as_ref(), + &Float32Array::from(vec![10.0, 20.0]) + ); + assert_eq!( + coerced_rb.column(1).as_ref(), + &StringArray::from(vec!["true", "false"]) + ); + + let projection_rb = enforce_schema( + input_rb.clone(), + &Arc::new(Schema::new(vec![ + // keep only the first column and make it non-nullable int16 + Arc::new(Field::new("f_int", DataType::Int16, false)), + ])), + ) + .expect("error enforcing a compatible subset of fields"); + assert_eq!(projection_rb.num_columns(), 1); + assert_eq!(projection_rb.num_rows(), 2); + assert_eq!(projection_rb.schema().fields().len(), 1); + assert_eq!(projection_rb.schema().fields()[0].name(), "f_int"); + + let incompatible_schema_attempt = enforce_schema( + input_rb.clone(), + &Arc::new(Schema::new(vec![ + Arc::new(Field::new("f_int", DataType::Float32, true)), + Arc::new(Field::new("f_bool", DataType::Date32, false)), + ])), + ) + .expect_err("got successful result for incompatible schema"); + assert_eq!( + incompatible_schema_attempt.to_string(), + "Cast error: Casting from Boolean to Date32 not supported" + ); + + let enhanced_with_null_column = enforce_schema( + input_rb.clone(), + &Arc::new(Schema::new(vec![ + Arc::new(Field::new("f_int", DataType::Int32, true)), + Arc::new(Field::new("f_bool", DataType::Boolean, false)), + Arc::new(Field::new("f_extra", DataType::Utf8, true)), + ])), + ) + .expect("error adding a nullable extra field"); + assert_eq!(enhanced_with_null_column.num_columns(), 3); + assert_eq!( + enhanced_with_null_column.column(2).as_ref(), + &StringArray::new_null(2) + ); + + let non_nullable_extra_field_attempt = enforce_schema( + input_rb.clone(), + &Arc::new(Schema::new(vec![ + Arc::new(Field::new("f_int", DataType::Int32, true)), + Arc::new(Field::new("f_bool", DataType::Boolean, false)), + Arc::new(Field::new("f_extra", DataType::Utf8, false)), + ])), + ) + .expect_err("got successful result for missing non-nullable field"); + assert_eq!( + non_nullable_extra_field_attempt.to_string(), + "Schema error: Required field `f_extra` is missing from the original record batch" + ); + } } diff --git a/core/src/flight/sql.rs b/core/src/flight/sql.rs index 73a4149d..2d9cd5fa 100644 --- a/core/src/flight/sql.rs +++ b/core/src/flight/sql.rs @@ -18,24 +18,13 @@ //! Default [FlightDriver] for Flight SQL use std::collections::HashMap; -use std::str::FromStr; use arrow_flight::error::Result; -use arrow_flight::flight_service_client::FlightServiceClient; -use arrow_flight::sql::{CommandStatementQuery, ProstMessageExt}; -use arrow_flight::{FlightDescriptor, FlightInfo, HandshakeRequest, HandshakeResponse}; -use arrow_schema::ArrowError; +use arrow_flight::sql::client::FlightSqlServiceClient; use async_trait::async_trait; -use base64::prelude::BASE64_STANDARD; -use base64::Engine; -use bytes::Bytes; -use futures::{stream, TryStreamExt}; -use prost::Message; -use tonic::metadata::AsciiMetadataKey; use tonic::transport::Channel; -use tonic::IntoRequest; -use crate::flight::{FlightDriver, FlightMetadata}; +use crate::flight::{FlightDriver, FlightMetadata, FlightProperties}; pub const QUERY: &str = "flight.sql.query"; pub const USERNAME: &str = "flight.sql.username"; @@ -51,7 +40,31 @@ pub const HEADER_PREFIX: &str = "flight.sql.header."; /// stored as a gRPC authorization header within the returned [FlightMetadata], /// to be sent with the subsequent `DoGet` requests. #[derive(Clone, Debug, Default)] -pub struct FlightSqlDriver {} +pub struct FlightSqlDriver { + properties_template: FlightProperties, + persistent_headers: bool, +} + +impl FlightSqlDriver { + pub fn new() -> Self { + Default::default() + } + + /// Custom flight properties to be returned from the metadata call instead of the default ones. + /// The headers (if any) will only be used for the Handshake/GetFlightInfo calls by default. + /// This behaviour can be changed by calling [Self::with_persistent_headers] below. + /// Headers provided as options for the metadata call will overwrite the template ones. + pub fn with_properties_template(mut self, properties_template: FlightProperties) -> Self { + self.properties_template = properties_template; + self + } + + /// Propagate the static headers configured for Handshake/GetFlightInfo to the subsequent DoGet calls. + pub fn with_persistent_headers(mut self, persistent_headers: bool) -> Self { + self.persistent_headers = persistent_headers; + self + } +} #[async_trait] impl FlightDriver for FlightSqlDriver { @@ -60,162 +73,34 @@ impl FlightDriver for FlightSqlDriver { channel: Channel, options: &HashMap, ) -> Result { - let mut client = FlightSqlClient::new(channel); - let headers = options.iter().filter_map(|(key, value)| { + let mut client = FlightSqlServiceClient::new(channel); + let mut handshake_headers = self.properties_template.grpc_headers.clone(); + let headers_overlay = options.iter().filter_map(|(key, value)| { key.strip_prefix(HEADER_PREFIX) - .map(|header_name| (header_name, value)) + .map(|header_name| (header_name.to_owned(), value.to_owned())) }); - for header in headers { - client.set_header(header.0, header.1) + handshake_headers.extend(headers_overlay); + for (name, value) in &handshake_headers { + client.set_header(name, value) } if let Some(username) = options.get(USERNAME) { let default_password = "".to_string(); let password = options.get(PASSWORD).unwrap_or(&default_password); - _ = client.handshake(username, password).await?; + client.handshake(username, password).await.ok(); } let info = client.execute(options[QUERY].clone(), None).await?; - let mut grpc_headers = HashMap::default(); - if let Some(token) = client.token { - grpc_headers.insert("authorization".into(), format!("Bearer {}", token)); - } - FlightMetadata::try_new(info, grpc_headers) - } -} - -///////////////////////////////////////////////////////////////////////// -// Shameless copy/paste from arrow-flight FlightSqlServiceClient -// (only cherry-picked the functionality that we actually use). -// This is only needed in order to access the bearer token received -// during handshake, as the standard client does not expose this information. -// The bearer token has to be passed to the clients that perform -// the DoGet operation, since Dremio, Ballista and possibly others -// expect the bearer token they produce with the handshake response -// to be set on all subsequent requests, including DoGet. -// -// TODO: remove this and switch to the official client once -// https://github.com/apache/arrow-rs/pull/6254 is released, -// and remove a bunch of cargo dependencies, like base64 or bytes -#[derive(Debug, Clone)] -struct FlightSqlClient { - token: Option, - headers: HashMap, - flight_client: FlightServiceClient, -} - -impl FlightSqlClient { - /// Creates a new FlightSql client that connects to a server over an arbitrary tonic `Channel` - fn new(channel: Channel) -> Self { - Self { - token: None, - flight_client: FlightServiceClient::new(channel), - headers: HashMap::default(), - } - } - - /// Perform a `handshake` with the server, passing credentials and establishing a session. - /// - /// If the server returns an "authorization" header, it is automatically parsed and set as - /// a token for future requests. Any other data returned by the server in the handshake - /// response is returned as a binary blob. - async fn handshake( - &mut self, - username: &str, - password: &str, - ) -> std::result::Result { - let cmd = HandshakeRequest { - protocol_version: 0, - payload: Default::default(), - }; - let mut req = tonic::Request::new(stream::iter(vec![cmd])); - let val = BASE64_STANDARD.encode(format!("{username}:{password}")); - let val = format!("Basic {val}") - .parse() - .map_err(|_| ArrowError::ParseError("Cannot parse header".to_string()))?; - req.metadata_mut().insert("authorization", val); - let req = self.set_request_headers(req)?; - let resp = self - .flight_client - .handshake(req) - .await - .map_err(|e| ArrowError::IpcError(format!("Can't handshake {e}")))?; - if let Some(auth) = resp.metadata().get("authorization") { - let auth = auth - .to_str() - .map_err(|_| ArrowError::ParseError("Can't read auth header".to_string()))?; - let bearer = "Bearer "; - if !auth.starts_with(bearer) { - Err(ArrowError::ParseError("Invalid auth header!".to_string()))?; - } - let auth = auth[bearer.len()..].to_string(); - self.token = Some(auth); - } - let responses: Vec = resp - .into_inner() - .try_collect() - .await - .map_err(|_| ArrowError::ParseError("Can't collect responses".to_string()))?; - let resp = match responses.as_slice() { - [resp] => resp.payload.clone(), - [] => Bytes::new(), - _ => Err(ArrowError::ParseError( - "Multiple handshake responses".to_string(), - ))?, + let mut partition_headers = if self.persistent_headers { + handshake_headers + } else { + HashMap::default() }; - Ok(resp) - } - - async fn execute( - &mut self, - query: String, - transaction_id: Option, - ) -> std::result::Result { - let cmd = CommandStatementQuery { - query, - transaction_id, - }; - self.get_flight_info_for_command(cmd).await - } - - async fn get_flight_info_for_command( - &mut self, - cmd: M, - ) -> std::result::Result { - let descriptor = FlightDescriptor::new_cmd(cmd.as_any().encode_to_vec()); - let req = self.set_request_headers(descriptor.into_request())?; - let fi = self - .flight_client - .get_flight_info(req) - .await - .map_err(|status| ArrowError::IpcError(format!("{status:?}")))? - .into_inner(); - Ok(fi) - } - - fn set_header(&mut self, key: impl Into, value: impl Into) { - let key: String = key.into(); - let value: String = value.into(); - self.headers.insert(key, value); - } - - fn set_request_headers( - &self, - mut req: tonic::Request, - ) -> std::result::Result, ArrowError> { - for (k, v) in &self.headers { - let k = AsciiMetadataKey::from_str(k.as_str()).map_err(|e| { - ArrowError::ParseError(format!("Cannot convert header key \"{k}\": {e}")) - })?; - let v = v.parse().map_err(|e| { - ArrowError::ParseError(format!("Cannot convert header value \"{v}\": {e}")) - })?; - req.metadata_mut().insert(k, v); - } - if let Some(token) = &self.token { - let val = format!("Bearer {token}").parse().map_err(|e| { - ArrowError::ParseError(format!("Cannot convert token to header value: {e}")) - })?; - req.metadata_mut().insert("authorization", val); + if let Some(token) = client.token() { + partition_headers.insert("authorization".into(), format!("Bearer {token}")); } - Ok(req) + let props = self + .properties_template + .clone() + .with_grpc_headers(partition_headers); + FlightMetadata::try_new(info, props) } } diff --git a/core/src/lib.rs b/core/src/lib.rs index a87f2304..3c9d9296 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -1,9 +1,14 @@ +#![cfg_attr(docsrs, feature(doc_auto_cfg))] + use serde::{Deserialize, Serialize}; use snafu::prelude::*; +pub mod common; pub mod sql; pub mod util; +#[cfg(feature = "clickhouse")] +pub mod clickhouse; #[cfg(feature = "duckdb")] pub mod duckdb; #[cfg(feature = "flight")] @@ -12,6 +17,8 @@ pub mod flight; pub mod mongodb; #[cfg(feature = "mysql")] pub mod mysql; +#[cfg(feature = "odbc")] +pub mod odbc; #[cfg(feature = "postgres")] pub mod postgres; #[cfg(feature = "sqlite")] diff --git a/core/src/mysql.rs b/core/src/mysql.rs index e8cfe0f7..c9d9b0e7 100644 --- a/core/src/mysql.rs +++ b/core/src/mysql.rs @@ -13,25 +13,98 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - +use crate::mysql::write::MySQLTableWriter; +use crate::sql::arrow_sql_gen::statement::{CreateTableBuilder, IndexBuilder, InsertBuilder}; +use crate::sql::db_connection_pool::dbconnection::mysqlconn::MySQLConnection; +use crate::sql::db_connection_pool::dbconnection::DbConnection; use crate::sql::db_connection_pool::mysqlpool::MySQLConnectionPool; -use crate::sql::sql_provider_datafusion::{self}; -use datafusion::{datasource::TableProvider, sql::TableReference}; -use mysql_async::Metrics; +use crate::sql::db_connection_pool::{self, mysqlpool, DbConnectionPool}; +use crate::sql::sql_provider_datafusion::{self, SqlTable}; +use crate::util::{ + self, column_reference::ColumnReference, constraints::get_primary_keys_from_constraints, + indexes::IndexType, on_conflict::OnConflict, secrets::to_secret_map, to_datafusion_error, +}; +use crate::util::{column_reference, constraints, on_conflict}; +use async_trait::async_trait; +use datafusion::arrow::array::RecordBatch; +use datafusion::arrow::datatypes::{Schema, SchemaRef}; +use datafusion::catalog::Session; +use datafusion::sql::unparser::dialect::MySqlDialect; +use datafusion::{ + catalog::TableProviderFactory, common::Constraints, datasource::TableProvider, + error::DataFusionError, logical_expr::CreateExternalTable, sql::TableReference, +}; +use mysql_async::prelude::{Queryable, ToValue}; +use mysql_async::{Metrics, TxOpts}; +use sea_query::{Alias, DeleteStatement, MysqlQueryBuilder}; use snafu::prelude::*; use sql_table::MySQLTable; +use std::collections::HashMap; use std::sync::Arc; +pub type DynMySQLConnectionPool = + dyn DbConnectionPool + Send + Sync; + +pub type DynMySQLConnection = dyn DbConnection; + +#[cfg(feature = "mysql-federation")] pub mod federation; pub(crate) mod mysql_window; pub mod sql_table; +pub mod write; #[derive(Debug, Snafu)] pub enum Error { + #[snafu(display("DbConnectionError: {source}"))] + DbConnectionError { + source: db_connection_pool::dbconnection::GenericError, + }, + #[snafu(display("Unable to construct SQL table: {source}"))] UnableToConstructSQLTable { source: sql_provider_datafusion::Error, }, + + #[snafu(display("Unable to delete all data from the MySQL table: {source}"))] + UnableToDeleteAllTableData { source: mysql_async::Error }, + + #[snafu(display("Unable to insert Arrow batch to MySQL table: {source}"))] + UnableToInsertArrowBatch { source: mysql_async::Error }, + + #[snafu(display("Unable to downcast DbConnection to MySQLConnection"))] + UnableToDowncastDbConnection {}, + + #[snafu(display("Unable to begin MySQL transaction: {source}"))] + UnableToBeginTransaction { source: mysql_async::Error }, + + #[snafu(display("Unable to create MySQL connection pool: {source}"))] + UnableToCreateMySQLConnectionPool { source: mysqlpool::Error }, + + #[snafu(display("Unable to create the MySQL table: {source}"))] + UnableToCreateMySQLTable { source: mysql_async::Error }, + + #[snafu(display("Unable to create an index for the MySQL table: {source}"))] + UnableToCreateIndexForMySQLTable { source: mysql_async::Error }, + + #[snafu(display("Unable to commit the MySQL transaction: {source}"))] + UnableToCommitMySQLTransaction { source: mysql_async::Error }, + + #[snafu(display("Unable to create insertion statement for MySQL table: {source}"))] + UnableToCreateInsertStatement { + source: crate::sql::arrow_sql_gen::statement::Error, + }, + + #[snafu(display("The table '{table_name}' doesn't exist in the MySQL server"))] + TableDoesntExist { table_name: String }, + + #[snafu(display("Constraint Violation: {source}"))] + ConstraintViolation { source: constraints::Error }, + + #[snafu(display("Error parsing column reference: {source}"))] + UnableToParseColumnReference { source: column_reference::Error }, + + #[snafu(display("Error parsing on_conflict: {source}"))] + UnableToParseOnConflict { source: on_conflict::Error }, } type Result = std::result::Result; @@ -52,11 +125,12 @@ impl MySQLTableFactory { ) -> Result, Box> { let pool = Arc::clone(&self.pool); let table_provider = Arc::new( - MySQLTable::new(&pool, table_reference, None) + MySQLTable::new(&pool, table_reference) .await .map_err(|e| Box::new(e) as Box)?, ); + #[cfg(feature = "mysql-federation")] let table_provider = Arc::new( table_provider .create_federated_table_provider() @@ -66,7 +140,312 @@ impl MySQLTableFactory { Ok(table_provider) } + pub async fn read_write_table_provider( + &self, + table_reference: TableReference, + ) -> Result, Box> { + let read_provider = Self::table_provider(self, table_reference.clone()).await?; + let schema = read_provider.schema(); + + let table_name = table_reference.to_string(); + let mysql = MySQL::new( + table_name, + Arc::clone(&self.pool), + schema, + Constraints::default(), + ); + + Ok(MySQLTableWriter::create(read_provider, mysql, None)) + } + pub fn conn_pool_metrics(&self) -> Arc { self.pool.metrics() } } + +#[derive(Debug)] +pub struct MySQLTableProviderFactory {} + +impl MySQLTableProviderFactory { + #[must_use] + pub fn new() -> Self { + Self {} + } +} + +impl Default for MySQLTableProviderFactory { + fn default() -> Self { + Self::new() + } +} + +#[async_trait] +impl TableProviderFactory for MySQLTableProviderFactory { + async fn create( + &self, + _state: &dyn Session, + cmd: &CreateExternalTable, + ) -> datafusion::common::Result> { + let name = cmd.name.to_string(); + let mut options = cmd.options.clone(); + let schema: Schema = cmd.schema.as_ref().into(); + + let indexes_option_str = options.remove("indexes"); + let unparsed_indexes: HashMap = match indexes_option_str { + Some(indexes_str) => util::hashmap_from_option_string(&indexes_str), + None => HashMap::new(), + }; + + let unparsed_indexes = unparsed_indexes + .into_iter() + .map(|(key, value)| { + let columns = ColumnReference::try_from(key.as_str()) + .context(UnableToParseColumnReferenceSnafu) + .map_err(util::to_datafusion_error); + (columns, value) + }) + .collect::, IndexType)>>(); + + let mut indexes: Vec<(ColumnReference, IndexType)> = Vec::new(); + for (columns, index_type) in unparsed_indexes { + let columns = columns?; + indexes.push((columns, index_type)); + } + + let mut on_conflict: Option = None; + if let Some(on_conflict_str) = options.remove("on_conflict") { + on_conflict = Some( + OnConflict::try_from(on_conflict_str.as_str()) + .context(UnableToParseOnConflictSnafu) + .map_err(util::to_datafusion_error)?, + ); + } + + let params = to_secret_map(options); + + let pool = Arc::new( + MySQLConnectionPool::new(params) + .await + .context(UnableToCreateMySQLConnectionPoolSnafu) + .map_err(to_datafusion_error)?, + ); + let schema = Arc::new(schema); + let mysql = MySQL::new( + name.clone(), + Arc::clone(&pool), + Arc::clone(&schema), + cmd.constraints.clone(), + ); + + let mut db_conn = pool + .connect() + .await + .context(DbConnectionSnafu) + .map_err(to_datafusion_error)?; + + let mysql_conn = MySQL::mysql_conn(&mut db_conn).map_err(to_datafusion_error)?; + let mut conn_guard = mysql_conn.conn.lock().await; + let mut transaction = conn_guard + .start_transaction(TxOpts::default()) + .await + .context(UnableToBeginTransactionSnafu) + .map_err(to_datafusion_error)?; + + let primary_keys = get_primary_keys_from_constraints(&cmd.constraints, &schema); + + mysql + .create_table(Arc::clone(&schema), &mut transaction, primary_keys) + .await + .map_err(to_datafusion_error)?; + + for index in indexes { + mysql + .create_index( + &mut transaction, + index.0.iter().collect(), + index.1 == IndexType::Unique, + ) + .await + .map_err(to_datafusion_error)?; + } + + transaction + .commit() + .await + .context(UnableToCommitMySQLTransactionSnafu) + .map_err(to_datafusion_error)?; + + drop(conn_guard); + + let dyn_pool: Arc = pool; + + let read_provider = Arc::new( + SqlTable::new_with_schema( + "mysql", + &dyn_pool, + Arc::clone(&schema), + TableReference::bare(name.clone()), + ) + .with_dialect(Arc::new(MySqlDialect {})), + ); + + #[cfg(feature = "mysql-federation")] + let read_provider = Arc::new(read_provider.create_federated_table_provider()?); + Ok(MySQLTableWriter::create(read_provider, mysql, on_conflict)) + } +} + +#[derive(Debug)] +pub struct MySQL { + table_name: String, + pool: Arc, + schema: SchemaRef, + constraints: Constraints, +} + +impl MySQL { + #[must_use] + pub fn new( + table_name: String, + pool: Arc, + schema: SchemaRef, + constraints: Constraints, + ) -> Self { + Self { + table_name, + pool, + schema, + constraints, + } + } + + #[must_use] + pub fn table_name(&self) -> &str { + &self.table_name + } + + #[must_use] + pub fn constraints(&self) -> &Constraints { + &self.constraints + } + + pub async fn connect(&self) -> Result> { + let mut conn = self.pool.connect().await.context(DbConnectionSnafu)?; + + let mysql_conn = Self::mysql_conn(&mut conn)?; + + if !self.table_exists(mysql_conn).await { + TableDoesntExistSnafu { + table_name: self.table_name.clone(), + } + .fail()?; + } + + Ok(conn) + } + + pub fn mysql_conn(db_connection: &mut Box) -> Result<&mut MySQLConnection> { + let conn = db_connection + .as_any_mut() + .downcast_mut::() + .context(UnableToDowncastDbConnectionSnafu)?; + + Ok(conn) + } + + async fn table_exists(&self, mysql_connection: &MySQLConnection) -> bool { + let sql = format!( + "SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_name = '{name}' + )", + name = self.table_name + ); + tracing::trace!("{sql}"); + let Ok(Some((exists,))) = mysql_connection + .conn + .lock() + .await + .query_first::<(bool,), _>(&sql) + .await + else { + return false; + }; + + exists + } + + async fn insert_batch( + &self, + transaction: &mut mysql_async::Transaction<'_>, + batch: RecordBatch, + on_conflict: Option, + ) -> Result<()> { + let insert_table_builder = + InsertBuilder::new(&TableReference::bare(self.table_name.clone()), vec![batch]); + + let sea_query_on_conflict = + on_conflict.map(|oc| oc.build_sea_query_on_conflict(&self.schema)); + + let sql = insert_table_builder + .build_mysql(sea_query_on_conflict) + .context(UnableToCreateInsertStatementSnafu)?; + + transaction + .exec_drop(&sql, ()) + .await + .context(UnableToInsertArrowBatchSnafu)?; + + Ok(()) + } + + async fn delete_all_table_data( + &self, + transaction: &mut mysql_async::Transaction<'_>, + ) -> Result<()> { + let delete = DeleteStatement::new() + .from_table(Alias::new(self.table_name.clone())) + .to_string(MysqlQueryBuilder); + transaction + .exec_drop(delete.as_str(), ()) + .await + .context(UnableToDeleteAllTableDataSnafu)?; + + Ok(()) + } + + async fn create_table( + &self, + schema: SchemaRef, + transaction: &mut mysql_async::Transaction<'_>, + primary_keys: Vec, + ) -> Result<()> { + let create_table_statement = + CreateTableBuilder::new(schema, &self.table_name).primary_keys(primary_keys); + let create_stmts = create_table_statement.build_mysql(); + + transaction + .exec_drop(create_stmts, ()) + .await + .context(UnableToCreateMySQLTableSnafu) + } + + async fn create_index( + &self, + transaction: &mut mysql_async::Transaction<'_>, + columns: Vec<&str>, + unique: bool, + ) -> Result<()> { + let mut index_builder = IndexBuilder::new(&self.table_name, columns); + if unique { + index_builder = index_builder.unique(); + } + let sql = index_builder.build_mysql(); + + transaction + .exec_drop(sql, ()) + .await + .context(UnableToCreateIndexForMySQLTableSnafu) + } +} diff --git a/core/src/mysql/federation.rs b/core/src/mysql/federation.rs index 73ce872c..210b8de7 100644 --- a/core/src/mysql/federation.rs +++ b/core/src/mysql/federation.rs @@ -26,12 +26,12 @@ impl MySQLTable { fn create_federated_table_source( self: Arc, ) -> DataFusionResult> { - let table_name = self.base_table.table_reference.clone(); + let table_reference = self.base_table.table_reference.clone(); let schema = Arc::clone(&Arc::clone(&self).base_table.schema()); let fed_provider = Arc::new(SQLFederationProvider::new(self)); Ok(Arc::new(SQLTableSource::new_with_schema( fed_provider, - RemoteTableRef::from(table_name), + RemoteTableRef::from(table_reference), schema, ))) } @@ -77,7 +77,8 @@ impl SQLExecutor for MySQLTable { } fn ast_analyzer(&self) -> Option { - Some(AstAnalyzer::new(vec![Box::new(mysql_ast_analyzer)])) + let rule = Box::new(mysql_ast_analyzer); + Some(AstAnalyzer::new(vec![rule])) } fn execute( diff --git a/core/src/mysql/mysql_window.rs b/core/src/mysql/mysql_window.rs index 7028e9ad..7d5c698e 100644 --- a/core/src/mysql/mysql_window.rs +++ b/core/src/mysql/mysql_window.rs @@ -1,5 +1,6 @@ -use datafusion::sql::sqlparser::ast::ObjectNamePart; -use datafusion::sql::sqlparser::ast::{Expr, Function, Ident, VisitorMut, WindowType}; +use datafusion::sql::sqlparser::ast::{ + Expr, Function, Ident, ObjectNamePart, VisitorMut, WindowType, +}; use std::ops::ControlFlow; #[derive(PartialEq, Eq)] @@ -76,6 +77,7 @@ mod test { use datafusion::sql::sqlparser::{ self, ast::{self, helpers::attached_token::AttachedToken, ObjectName, WindowFrame}, + tokenizer::{Span, Token, TokenWithSpan}, }; use super::*; @@ -83,13 +85,19 @@ mod test { #[test] fn test_remove_frame_clause() { let mut func = Function { - name: ObjectName(vec![ObjectNamePart::Identifier(Ident::new("RANK"))]), + name: ObjectName(vec![ObjectNamePart::Identifier(Ident { + value: "RANK".to_string(), + quote_style: None, + span: Span::empty(), + })]), args: ast::FunctionArguments::None, over: Some(WindowType::WindowSpec(ast::WindowSpec { window_name: None, partition_by: vec![], order_by: vec![sqlparser::ast::OrderByExpr { - expr: sqlparser::ast::Expr::Wildcard(AttachedToken::empty()), + expr: sqlparser::ast::Expr::Wildcard(AttachedToken(TokenWithSpan::wrap( + Token::Char('*'), + ))), options: sqlparser::ast::OrderByOptions { asc: None, nulls_first: Some(true), @@ -113,7 +121,9 @@ mod test { window_name: None, partition_by: vec![], order_by: vec![sqlparser::ast::OrderByExpr { - expr: sqlparser::ast::Expr::Wildcard(AttachedToken::empty()), + expr: sqlparser::ast::Expr::Wildcard(AttachedToken(TokenWithSpan::wrap( + Token::Char('*'), + ))), options: sqlparser::ast::OrderByOptions { asc: None, nulls_first: Some(true), @@ -131,13 +141,19 @@ mod test { #[test] fn test_remove_nulls_first_last() { let mut func = Function { - name: ObjectName(vec![ObjectNamePart::Identifier(Ident::new("RANK"))]), + name: ObjectName(vec![ObjectNamePart::Identifier(Ident { + value: "RANK".to_string(), + quote_style: None, + span: Span::empty(), + })]), args: sqlparser::ast::FunctionArguments::None, over: Some(WindowType::WindowSpec(sqlparser::ast::WindowSpec { window_name: None, partition_by: vec![], order_by: vec![sqlparser::ast::OrderByExpr { - expr: sqlparser::ast::Expr::Wildcard(AttachedToken::empty()), + expr: sqlparser::ast::Expr::Wildcard(AttachedToken(TokenWithSpan::wrap( + Token::Char('*'), + ))), options: sqlparser::ast::OrderByOptions { asc: None, nulls_first: Some(true), @@ -161,7 +177,9 @@ mod test { window_name: None, partition_by: vec![], order_by: vec![sqlparser::ast::OrderByExpr { - expr: sqlparser::ast::Expr::Wildcard(AttachedToken::empty()), + expr: sqlparser::ast::Expr::Wildcard(AttachedToken(TokenWithSpan::wrap( + Token::Char('*'), + ))), options: sqlparser::ast::OrderByOptions { asc: None, nulls_first: None, diff --git a/core/src/mysql/sql_table.rs b/core/src/mysql/sql_table.rs index 56cff3ea..21aba7ba 100644 --- a/core/src/mysql/sql_table.rs +++ b/core/src/mysql/sql_table.rs @@ -1,9 +1,7 @@ use crate::sql::db_connection_pool::mysqlpool::MySQLConnectionPool; use crate::sql::db_connection_pool::DbConnectionPool; -use crate::sql::sql_provider_datafusion::expr::Engine; use async_trait::async_trait; use datafusion::catalog::Session; -use datafusion::common::Constraints; use datafusion::sql::unparser::dialect::MySqlDialect; use futures::TryStreamExt; use mysql_async::prelude::ToValue; @@ -43,7 +41,6 @@ impl MySQLTable { pub async fn new( pool: &Arc, table_reference: impl Into, - constraints: Option, ) -> Result { let dyn_pool = Arc::clone(pool) as Arc< @@ -51,10 +48,9 @@ impl MySQLTable { + Send + Sync, >; - let base_table = SqlTable::new("mysql", &dyn_pool, table_reference, None) + let base_table = SqlTable::new("mysql", &dyn_pool, table_reference) .await? - .with_dialect(Arc::new(MySqlDialect {})) - .with_constraints_opt(constraints); + .with_dialect(Arc::new(MySqlDialect {})); Ok(Self { pool: Arc::clone(pool), @@ -69,13 +65,12 @@ impl MySQLTable { filters: &[Expr], limit: Option, ) -> DataFusionResult> { + let sql = self.base_table.scan_to_sql(projections, filters, limit)?; Ok(Arc::new(MySQLSQLExec::new( projections, schema, - &self.base_table.table_reference, Arc::clone(&self.pool), - filters, - limit, + sql, )?)) } } @@ -90,10 +85,6 @@ impl TableProvider for MySQLTable { self.base_table.schema() } - fn constraints(&self) -> Option<&Constraints> { - self.base_table.constraints() - } - fn table_type(&self) -> TableType { self.base_table.table_type() } @@ -130,20 +121,10 @@ impl MySQLSQLExec { fn new( projections: Option<&Vec>, schema: &SchemaRef, - table_reference: &TableReference, pool: Arc, - filters: &[Expr], - limit: Option, + sql: String, ) -> DataFusionResult { - let base_exec = SqlExec::new( - projections, - schema, - table_reference, - pool, - filters, - limit, - Some(Engine::MySQL), - )?; + let base_exec = SqlExec::new(projections, schema, pool, sql)?; Ok(Self { base_exec }) } diff --git a/core/src/mysql/write.rs b/core/src/mysql/write.rs new file mode 100644 index 00000000..2c46eff6 --- /dev/null +++ b/core/src/mysql/write.rs @@ -0,0 +1,198 @@ +use crate::mysql::MySQL; +use crate::util::on_conflict::OnConflict; +use crate::util::retriable_error::check_and_mark_retriable_error; +use crate::util::{constraints, to_datafusion_error}; +use async_trait::async_trait; +use datafusion::arrow::datatypes::SchemaRef; +use datafusion::datasource::sink::{DataSink, DataSinkExec}; +use datafusion::{ + catalog::Session, + datasource::{TableProvider, TableType}, + execution::{SendableRecordBatchStream, TaskContext}, + logical_expr::{dml::InsertOp, Expr}, + physical_plan::{metrics::MetricsSet, DisplayAs, DisplayFormatType, ExecutionPlan}, +}; +use futures::StreamExt; +use mysql_async::TxOpts; +use snafu::ResultExt; +use std::any::Any; +use std::fmt; +use std::sync::Arc; + +#[derive(Debug, Clone)] +pub struct MySQLTableWriter { + pub read_provider: Arc, + mysql: Arc, + on_conflict: Option, +} + +impl MySQLTableWriter { + pub fn create( + read_provider: Arc, + mysql: MySQL, + on_conflict: Option, + ) -> Arc { + Arc::new(Self { + read_provider, + mysql: Arc::new(mysql), + on_conflict, + }) + } + + pub fn mysql(&self) -> Arc { + Arc::clone(&self.mysql) + } +} + +#[async_trait] +impl TableProvider for MySQLTableWriter { + fn as_any(&self) -> &dyn Any { + self + } + + fn schema(&self) -> SchemaRef { + self.read_provider.schema() + } + + fn table_type(&self) -> TableType { + TableType::Base + } + + async fn scan( + &self, + state: &dyn Session, + projection: Option<&Vec>, + filters: &[Expr], + limit: Option, + ) -> datafusion::common::Result> { + self.read_provider + .scan(state, projection, filters, limit) + .await + } + + async fn insert_into( + &self, + _state: &dyn Session, + input: Arc, + op: InsertOp, + ) -> datafusion::common::Result> { + Ok(Arc::new(DataSinkExec::new( + input, + Arc::new(MySQLDataSink::new( + Arc::clone(&self.mysql), + op == InsertOp::Overwrite, + self.on_conflict.clone(), + self.schema(), + )), + None, + ))) + } +} + +pub struct MySQLDataSink { + pub mysql: Arc, + pub overwrite: bool, + pub on_conflict: Option, + schema: SchemaRef, +} + +#[async_trait] +impl DataSink for MySQLDataSink { + fn as_any(&self) -> &dyn Any { + self + } + + fn metrics(&self) -> Option { + None + } + + fn schema(&self) -> &SchemaRef { + &self.schema + } + + async fn write_all( + &self, + mut data: SendableRecordBatchStream, + _context: &Arc, + ) -> datafusion::common::Result { + let mut num_rows = 0u64; + + let mut db_conn = self.mysql.connect().await.map_err(to_datafusion_error)?; + let mysql_conn = MySQL::mysql_conn(&mut db_conn).map_err(to_datafusion_error)?; + + let mut conn_guard = mysql_conn.conn.lock().await; + let mut tx = conn_guard + .start_transaction(TxOpts::default()) + .await + .context(super::UnableToBeginTransactionSnafu) + .map_err(to_datafusion_error)?; + + if self.overwrite { + self.mysql + .delete_all_table_data(&mut tx) + .await + .map_err(to_datafusion_error)?; + } + + while let Some(batch) = data.next().await { + let batch = batch.map_err(check_and_mark_retriable_error)?; + let batch_num_rows = batch.num_rows(); + + if batch_num_rows == 0 { + continue; + } + + num_rows += batch_num_rows as u64; + + constraints::validate_batch_with_constraints( + std::slice::from_ref(&batch), + self.mysql.constraints(), + ) + .await + .context(super::ConstraintViolationSnafu) + .map_err(to_datafusion_error)?; + + self.mysql + .insert_batch(&mut tx, batch, self.on_conflict.clone()) + .await + .map_err(to_datafusion_error)?; + } + + tx.commit() + .await + .context(super::UnableToCommitMySQLTransactionSnafu) + .map_err(to_datafusion_error)?; + + drop(conn_guard); + + Ok(num_rows) + } +} + +impl MySQLDataSink { + pub fn new( + mysql: Arc, + overwrite: bool, + on_conflict: Option, + schema: SchemaRef, + ) -> Self { + Self { + mysql, + overwrite, + on_conflict, + schema, + } + } +} + +impl fmt::Debug for MySQLDataSink { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "MySQLDataSink") + } +} + +impl DisplayAs for MySQLDataSink { + fn fmt_as(&self, _t: DisplayFormatType, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "MySQLDataSink") + } +} diff --git a/core/src/odbc.rs b/core/src/odbc.rs new file mode 100644 index 00000000..57567552 --- /dev/null +++ b/core/src/odbc.rs @@ -0,0 +1,88 @@ +/* +Copyright 2024 The Spice.ai OSS Authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +use crate::sql::db_connection_pool::dbconnection::odbcconn::ODBCDbConnectionPool; +use crate::sql::{ + db_connection_pool as db_connection_pool_datafusion, sql_provider_datafusion::SqlTable, +}; +use datafusion::arrow::datatypes::SchemaRef; +use datafusion::error::DataFusionError; +use datafusion::{datasource::TableProvider, sql::TableReference}; +use snafu::prelude::*; +use std::sync::Arc; + +#[derive(Debug, Snafu)] +pub enum Error { + #[snafu(display("DbConnectionError: {source}"))] + DbConnectionError { + source: db_connection_pool_datafusion::dbconnection::GenericError, + }, + #[snafu(display("The table '{table_name}' doesn't exist in the Postgres server"))] + TableDoesntExist { table_name: String }, + + #[snafu(display("Unable to get a DB connection from the pool: {source}"))] + UnableToGetConnectionFromPool { + source: db_connection_pool_datafusion::Error, + }, + + #[snafu(display("Unable to get schema: {source}"))] + UnableToGetSchema { + source: db_connection_pool_datafusion::dbconnection::Error, + }, + + #[snafu(display("Unable to generate SQL: {source}"))] + UnableToGenerateSQL { source: DataFusionError }, +} + +type Result = std::result::Result; + +pub struct ODBCTableFactory<'a> { + pool: Arc>, +} + +impl<'a> ODBCTableFactory<'a> +where + 'a: 'static, +{ + #[must_use] + pub fn new(pool: Arc>) -> Self { + Self { pool } + } + + pub async fn table_provider( + &self, + table_reference: TableReference, + _schema: Option, + ) -> Result, Box> { + let pool = Arc::clone(&self.pool); + let dyn_pool: Arc> = pool; + + let table = SqlTable::new("odbc", &dyn_pool, table_reference) + .await + .map_err(|e| Box::new(e) as Box)?; + + let table_provider = Arc::new(table); + + #[cfg(feature = "odbc-federation")] + let table_provider = Arc::new( + table_provider + .create_federated_table_provider() + .map_err(|e| Box::new(e) as Box)?, + ); + + Ok(table_provider) + } +} diff --git a/core/src/postgres.rs b/core/src/postgres.rs index 2831e162..de089d44 100644 --- a/core/src/postgres.rs +++ b/core/src/postgres.rs @@ -7,10 +7,7 @@ use crate::sql::db_connection_pool::{ postgrespool::{self, PostgresConnectionPool}, DbConnectionPool, }; -use crate::sql::sql_provider_datafusion::{ - expr::{self, Engine}, - SqlTable, -}; +use crate::sql::sql_provider_datafusion::SqlTable; use crate::util::schema::SchemaValidator; use crate::UnsupportedTypeAction; use arrow::{ @@ -23,13 +20,14 @@ use bb8_postgres::{ PostgresConnectionManager, }; use datafusion::catalog::Session; +use datafusion::sql::unparser::dialect::PostgreSqlDialect; use datafusion::{ catalog::TableProviderFactory, common::Constraints, datasource::TableProvider, error::{DataFusionError, Result as DataFusionResult}, logical_expr::CreateExternalTable, - sql::{unparser::dialect::PostgreSqlDialect, TableReference}, + sql::TableReference, }; use postgres_native_tls::MakeTlsConnector; use snafu::prelude::*; @@ -42,6 +40,7 @@ use crate::util::{ indexes::IndexType, on_conflict::{self, OnConflict}, secrets::to_secret_map, + to_datafusion_error, }; use self::write::PostgresTableWriter; @@ -92,7 +91,7 @@ pub enum Error { }, #[snafu(display("Unable to generate SQL: {source}"))] - UnableToGenerateSQL { source: expr::Error }, + UnableToGenerateSQL { source: DataFusionError }, #[snafu(display("Unable to delete all data from the Postgres table: {source}"))] UnableToDeleteAllTableData { @@ -153,17 +152,13 @@ impl PostgresTableFactory { let dyn_pool: Arc = pool; let table_provider = Arc::new( - SqlTable::new( - "postgres", - &dyn_pool, - table_reference, - Some(Engine::Postgres), - ) - .await - .map_err(|e| Box::new(e) as Box)? - .with_dialect(Arc::new(PostgreSqlDialect {})), + SqlTable::new("postgres", &dyn_pool, table_reference) + .await + .map_err(|e| Box::new(e) as Box)? + .with_dialect(Arc::new(PostgreSqlDialect {})), ); + #[cfg(feature = "postgres-federation")] let table_provider = Arc::new( table_provider .create_federated_table_provider() @@ -184,7 +179,7 @@ impl PostgresTableFactory { table_reference, Arc::clone(&self.pool), schema, - Constraints::new_unverified(vec![]), + Constraints::default(), ); Ok(PostgresTableWriter::create(read_provider, postgres, None)) @@ -192,7 +187,7 @@ impl PostgresTableFactory { } #[derive(Debug)] -pub struct PostgresTableProviderFactory {} +pub struct PostgresTableProviderFactory; impl PostgresTableProviderFactory { #[must_use] @@ -313,15 +308,8 @@ impl TableProviderFactory for PostgresTableProviderFactory { let dyn_pool: Arc = pool; let read_provider = Arc::new( - SqlTable::new_with_schema( - "postgres", - &dyn_pool, - Arc::clone(&schema), - name, - Some(Engine::Postgres), - ) - .with_dialect(Arc::new(PostgreSqlDialect {})) - .with_constraints(cmd.constraints.clone()), + SqlTable::new_with_schema("postgres", &dyn_pool, Arc::clone(&schema), name) + .with_dialect(Arc::new(PostgreSqlDialect {})), ); #[cfg(feature = "postgres-federation")] @@ -335,10 +323,6 @@ impl TableProviderFactory for PostgresTableProviderFactory { } } -fn to_datafusion_error(error: Error) -> DataFusionError { - DataFusionError::External(Box::new(error)) -} - #[derive(Clone)] pub struct Postgres { table: TableReference, @@ -410,12 +394,12 @@ impl Postgres { async fn table_exists(&self, postgres_conn: &PostgresConnection) -> bool { let sql = match self.table.schema() { Some(schema) => format!( - r#"SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = '{name}' AND table_schema = '{schema}')"#, + "SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = '{name}' AND table_schema = '{schema}')", name = self.table.table(), schema = schema ), None => format!( - r#"SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = '{name}')"#, + "SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = '{name}')", name = self.table.table() ), }; @@ -455,7 +439,7 @@ impl Postgres { async fn delete_all_table_data(&self, transaction: &Transaction<'_>) -> Result<()> { transaction .execute( - format!(r#"DELETE FROM {}"#, self.table.to_quoted_string()).as_str(), + format!("DELETE FROM {}", self.table.to_quoted_string()).as_str(), &[], ) .await diff --git a/core/src/postgres/write.rs b/core/src/postgres/write.rs index c81e36a7..6ffb5bbc 100644 --- a/core/src/postgres/write.rs +++ b/core/src/postgres/write.rs @@ -18,12 +18,14 @@ use futures::StreamExt; use snafu::prelude::*; use crate::util::{ - constraints::{self, UpsertOptions}, + constraints::{self}, on_conflict::OnConflict, retriable_error::check_and_mark_retriable_error, }; -use super::{to_datafusion_error, Postgres}; +use crate::postgres::Postgres; + +use super::to_datafusion_error; #[derive(Debug, Clone)] pub struct PostgresTableWriter { @@ -84,13 +86,13 @@ impl TableProvider for PostgresTableWriter { &self, _state: &dyn Session, input: Arc, - overwrite: InsertOp, + op: InsertOp, ) -> datafusion::error::Result> { Ok(Arc::new(DataSinkExec::new( input, Arc::new(PostgresDataSink::new( Arc::clone(&self.postgres), - overwrite, + op, self.on_conflict.clone(), self.schema(), )), @@ -165,13 +167,6 @@ impl DataSink for PostgresDataSink { let postgres_schema = Arc::new(Schema::new(postgres_fields)); - let upsert_options = self - .on_conflict - .as_ref() - .map_or_else(UpsertOptions::default, |conflict| { - conflict.get_upsert_options() - }); - while let Some(batch) = data.next().await { let batch = batch.map_err(check_and_mark_retriable_error)?; @@ -210,21 +205,18 @@ impl DataSink for PostgresDataSink { num_rows += batch_num_rows as u64; - let batches = constraints::validate_batch_with_constraints( - vec![batch], + constraints::validate_batch_with_constraints( + &[batch.clone()], self.postgres.constraints(), - &upsert_options, ) .await .context(super::ConstraintViolationSnafu) .map_err(to_datafusion_error)?; - for batch in batches { - self.postgres - .insert_batch(&tx, batch, self.on_conflict.clone()) - .await - .map_err(to_datafusion_error)?; - } + self.postgres + .insert_batch(&tx, batch, self.on_conflict.clone()) + .await + .map_err(to_datafusion_error)?; } tx.commit() diff --git a/core/src/sql/arrow_sql_gen/arrow.rs b/core/src/sql/arrow_sql_gen/arrow.rs index f60da64e..7ecb9c24 100644 --- a/core/src/sql/arrow_sql_gen/arrow.rs +++ b/core/src/sql/arrow_sql_gen/arrow.rs @@ -1,4 +1,4 @@ -use arrow::{ +use datafusion::arrow::{ array::{ types::Int8Type, ArrayBuilder, BinaryBuilder, BooleanBuilder, Date32Builder, Date64Builder, Decimal128Builder, Decimal256Builder, FixedSizeBinaryBuilder, FixedSizeListBuilder, @@ -86,6 +86,7 @@ pub fn map_data_type_to_array_builder(data_type: &DataType) -> Box Box::new(ListBuilder::new(Int16Builder::new())), DataType::Int32 => Box::new(ListBuilder::new(Int32Builder::new())), DataType::Int64 => Box::new(ListBuilder::new(Int64Builder::new())), + DataType::UInt32 => Box::new(ListBuilder::new(UInt32Builder::new())), DataType::Float32 => Box::new(ListBuilder::new(Float32Builder::new())), DataType::Float64 => Box::new(ListBuilder::new(Float64Builder::new())), DataType::Utf8 => Box::new(ListBuilder::new(StringBuilder::new())), @@ -111,6 +112,10 @@ pub fn map_data_type_to_array_builder(data_type: &DataType) -> Box Box::new(FixedSizeListBuilder::new( + UInt32Builder::new(), + size.to_owned(), + )), DataType::Float32 => Box::new(FixedSizeListBuilder::new( Float32Builder::new(), size.to_owned(), diff --git a/core/src/sql/arrow_sql_gen/mod.rs b/core/src/sql/arrow_sql_gen/mod.rs index a315abf2..606b831a 100644 --- a/core/src/sql/arrow_sql_gen/mod.rs +++ b/core/src/sql/arrow_sql_gen/mod.rs @@ -9,7 +9,7 @@ //! ### `CREATE TABLE` statement //! ```rust //! use std::sync::Arc; -//! use arrow::datatypes::{DataType, Field, Schema}; +//! use datafusion::arrow::datatypes::{DataType, Field, Schema}; //! use datafusion_table_providers::sql::arrow_sql_gen::statement::CreateTableBuilder; //! //! let schema = Arc::new(Schema::new(vec![ @@ -26,7 +26,7 @@ //! With primary key constraints: //! ```rust //! use std::sync::Arc; -//! use arrow::datatypes::{DataType, Field, Schema}; +//! use datafusion::arrow::datatypes::{DataType, Field, Schema}; //! use datafusion_table_providers::sql::arrow_sql_gen::statement::CreateTableBuilder; //! //! let schema = Arc::new(Schema::new(vec![ diff --git a/core/src/sql/arrow_sql_gen/mysql.rs b/core/src/sql/arrow_sql_gen/mysql.rs index f45cde56..d982c907 100644 --- a/core/src/sql/arrow_sql_gen/mysql.rs +++ b/core/src/sql/arrow_sql_gen/mysql.rs @@ -15,11 +15,14 @@ use chrono::{NaiveDate, NaiveTime, Timelike}; use mysql_async::{consts::ColumnFlags, consts::ColumnType, FromValueError, Row, Value}; use snafu::{ResultExt, Snafu}; use std::{convert, sync::Arc}; +use time::PrimitiveDateTime; #[derive(Debug, Snafu)] pub enum Error { #[snafu(display("Failed to build record batch: {source}"))] - FailedToBuildRecordBatch { source: arrow::error::ArrowError }, + FailedToBuildRecordBatch { + source: datafusion::arrow::error::ArrowError, + }, #[snafu(display("No builder found for index {index}"))] NoBuilderForIndex { index: usize }, @@ -526,7 +529,7 @@ pub fn rows_to_arrow(rows: &[Row], projected_schema: &Option) -> Resu .fail(); }; let v = match handle_null_error( - row.get_opt::(i).transpose(), + row.get_opt::(i).transpose(), ) { Ok(v) => v, Err(err) => { @@ -545,7 +548,10 @@ pub fn rows_to_arrow(rows: &[Row], projected_schema: &Option) -> Resu match v { Some(v) => { - builder.append_value(v.and_utc().timestamp_micros()); + let micros: i64 = (v.assume_utc().unix_timestamp_nanos() / 1000) + .try_into() + .unwrap(); + builder.append_value(micros); } None => builder.append_null(), } diff --git a/core/src/sql/arrow_sql_gen/postgres.rs b/core/src/sql/arrow_sql_gen/postgres.rs index 6bf76caf..ae399581 100644 --- a/core/src/sql/arrow_sql_gen/postgres.rs +++ b/core/src/sql/arrow_sql_gen/postgres.rs @@ -36,7 +36,9 @@ pub mod schema; #[derive(Debug, Snafu)] pub enum Error { #[snafu(display("Failed to build record batch: {source}"))] - FailedToBuildRecordBatch { source: arrow::error::ArrowError }, + FailedToBuildRecordBatch { + source: datafusion::arrow::error::ArrowError, + }, #[snafu(display("No builder found for index {index}"))] NoBuilderForIndex { index: usize }, @@ -257,18 +259,48 @@ pub fn rows_to_arrow(rows: &[Row], projected_schema: &Option) -> Resu Type::INT8 => { handle_primitive_type!(builder, Type::INT8, Int64Builder, i64, row, i); } + Type::OID => { + handle_primitive_type!(builder, Type::OID, UInt32Builder, u32, row, i); + } + Type::XID => { + let Some(builder) = builder else { + return NoBuilderForIndexSnafu { index: i }.fail(); + }; + let Some(builder) = builder.as_any_mut().downcast_mut::() else { + return FailedToDowncastBuilderSnafu { + postgres_type: format!("{postgres_type}"), + } + .fail(); + }; + let v = row + .try_get::>(i) + .with_context(|_| FailedToGetRowValueSnafu { pg_type: Type::XID })?; + + match v { + Some(v) => { + builder.append_value(v.xid); + } + None => builder.append_null(), + } + } Type::FLOAT4 => { handle_primitive_type!(builder, Type::FLOAT4, Float32Builder, f32, row, i); } Type::FLOAT8 => { handle_primitive_type!(builder, Type::FLOAT8, Float64Builder, f64, row, i); } + Type::CHAR => { + handle_primitive_type!(builder, Type::CHAR, Int8Builder, i8, row, i); + } Type::TEXT => { handle_primitive_type!(builder, Type::TEXT, StringBuilder, &str, row, i); } Type::VARCHAR => { handle_primitive_type!(builder, Type::VARCHAR, StringBuilder, &str, row, i); } + Type::NAME => { + handle_primitive_type!(builder, Type::NAME, StringBuilder, &str, row, i); + } Type::BYTEA => { handle_primitive_type!(builder, Type::BYTEA, BinaryBuilder, Vec, row, i); } @@ -629,6 +661,14 @@ pub fn rows_to_arrow(rows: &[Row], projected_schema: &Option) -> Resu ListBuilder, i64 ), + Type::OID_ARRAY => handle_primitive_array_type!( + Type::OID_ARRAY, + builder, + row, + i, + ListBuilder, + u32 + ), Type::FLOAT4_ARRAY => handle_primitive_array_type!( Type::FLOAT4_ARRAY, builder, @@ -830,9 +870,13 @@ fn map_column_type_to_data_type(column_type: &Type, field_name: &str) -> Result< Type::INT2 => Ok(Some(DataType::Int16)), Type::INT4 => Ok(Some(DataType::Int32)), Type::INT8 | Type::MONEY => Ok(Some(DataType::Int64)), + Type::OID | Type::XID => Ok(Some(DataType::UInt32)), Type::FLOAT4 => Ok(Some(DataType::Float32)), Type::FLOAT8 => Ok(Some(DataType::Float64)), - Type::TEXT | Type::VARCHAR | Type::BPCHAR | Type::UUID => Ok(Some(DataType::Utf8)), + Type::CHAR => Ok(Some(DataType::Int8)), + Type::TEXT | Type::VARCHAR | Type::BPCHAR | Type::UUID | Type::NAME => { + Ok(Some(DataType::Utf8)) + } Type::BYTEA => Ok(Some(DataType::Binary)), Type::BOOL => Ok(Some(DataType::Boolean)), // Schema validation will only allow JSONB columns when `UnsupportedTypeAction` is set to `String`, so it is safe to handle JSONB here as strings. @@ -852,6 +896,7 @@ fn map_column_type_to_data_type(column_type: &Type, field_name: &str) -> Result< Arc::new(Field::new("item", DataType::Float64, true)), 2, ))), + Type::PG_NODE_TREE => Ok(Some(DataType::Utf8)), Type::INT2_ARRAY => Ok(Some(DataType::List(Arc::new(Field::new( "item", DataType::Int16, @@ -867,6 +912,11 @@ fn map_column_type_to_data_type(column_type: &Type, field_name: &str) -> Result< DataType::Int64, true, ))))), + Type::OID_ARRAY => Ok(Some(DataType::List(Arc::new(Field::new( + "item", + DataType::UInt32, + true, + ))))), Type::FLOAT4_ARRAY => Ok(Some(DataType::List(Arc::new(Field::new( "item", DataType::Float32, @@ -1031,6 +1081,25 @@ impl<'a> FromSql<'a> for GeometryFromSql<'a> { } } +struct XidFromSql { + xid: u32, +} + +impl<'a> FromSql<'a> for XidFromSql { + fn from_sql( + _ty: &Type, + raw: &'a [u8], + ) -> Result> { + let mut cursor = std::io::Cursor::new(raw); + let xid = cursor.read_u32::()?; + Ok(XidFromSql { xid }) + } + + fn accepts(ty: &Type) -> bool { + matches!(*ty, Type::XID) + } +} + fn get_decimal_column_precision_and_scale( column_name: &str, projected_schema: &SchemaRef, @@ -1045,8 +1114,8 @@ fn get_decimal_column_precision_and_scale( #[cfg(test)] mod tests { use super::*; - use arrow::array::{Time64NanosecondArray, Time64NanosecondBuilder}; use chrono::NaiveTime; + use datafusion::arrow::array::{Time64NanosecondArray, Time64NanosecondBuilder}; use geo_types::{point, polygon, Geometry}; use geozero::{CoordDimensions, ToWkb}; use std::str::FromStr; diff --git a/core/src/sql/arrow_sql_gen/postgres/builder.rs b/core/src/sql/arrow_sql_gen/postgres/builder.rs index 2ff35495..6b9e6e84 100644 --- a/core/src/sql/arrow_sql_gen/postgres/builder.rs +++ b/core/src/sql/arrow_sql_gen/postgres/builder.rs @@ -1,4 +1,4 @@ -use arrow::datatypes::Fields; +use datafusion::arrow::datatypes::Fields; use sea_query::{Alias, ColumnDef, PostgresQueryBuilder, TableBuilder}; use crate::sql::arrow_sql_gen::statement::map_data_type_to_column_type; @@ -80,7 +80,7 @@ fn fields_to_simple_column_defs(fields: &Fields) -> Vec { #[cfg(test)] mod tests { - use arrow::datatypes::{DataType, Field, Schema}; + use datafusion::arrow::datatypes::{DataType, Field, Schema}; use super::*; diff --git a/core/src/sql/arrow_sql_gen/postgres/schema.rs b/core/src/sql/arrow_sql_gen/postgres/schema.rs index 4539cb99..9037bb4f 100644 --- a/core/src/sql/arrow_sql_gen/postgres/schema.rs +++ b/core/src/sql/arrow_sql_gen/postgres/schema.rs @@ -50,15 +50,16 @@ pub(crate) fn pg_data_type_to_arrow_type( "smallint" => Ok(DataType::Int16), "integer" | "int" | "int4" => Ok(DataType::Int32), "bigint" | "int8" | "money" => Ok(DataType::Int64), + "oid" | "xid" | "regproc" => Ok(DataType::UInt32), "numeric" | "decimal" => { let (precision, scale) = parse_numeric_type(pg_type)?; Ok(DataType::Decimal128(precision, scale)) } "real" | "float4" => Ok(DataType::Float32), "double precision" | "float8" => Ok(DataType::Float64), - "character" | "char" | "character varying" | "varchar" | "text" | "bpchar" | "uuid" => { - Ok(DataType::Utf8) - } + "\"char\"" => Ok(DataType::Int8), + "character" | "char" | "character varying" | "varchar" | "text" | "bpchar" | "uuid" + | "name" => Ok(DataType::Utf8), "bytea" => Ok(DataType::Binary), "date" => Ok(DataType::Date32), "time" | "time without time zone" => Ok(DataType::Time64(TimeUnit::Nanosecond)), @@ -79,10 +80,24 @@ pub(crate) fn pg_data_type_to_arrow_type( Arc::new(Field::new("item", DataType::Float64, true)), 2, )), + "line" | "lseg" | "box" | "path" | "polygon" | "circle" => Ok(DataType::Binary), + "inet" | "cidr" | "macaddr" => Ok(DataType::Utf8), + "bit" | "bit varying" => Ok(DataType::Binary), + "tsvector" | "tsquery" => Ok(DataType::LargeUtf8), "xml" | "json" => Ok(DataType::Utf8), // `Name` is a 64 bytes (varchar) / internal type for object names - "\"Name\"" => Ok(DataType::Utf8), + "\"Name\"" => Ok(DataType::Utf8), + "aclitem" | "pg_node_tree" => Ok(DataType::Utf8), "array" => parse_array_type(context), + "anyarray" => Ok(DataType::List(Arc::new(Field::new( + "item", + DataType::Binary, + true, + )))), + "int4range" => Ok(DataType::Struct(Fields::from(vec![ + Field::new("lower", DataType::Int32, true), + Field::new("upper", DataType::Int32, true), + ]))), "composite" => parse_composite_type(context), "geometry" | "geography" => Ok(DataType::Binary), @@ -250,6 +265,11 @@ mod tests { pg_data_type_to_arrow_type("boolean", &context).expect("Failed to convert boolean"), DataType::Boolean ); + assert_eq!( + pg_data_type_to_arrow_type("\"char\"", &context) + .expect("Failed to convert single character"), + DataType::Int8 + ); // Test string types assert_eq!( @@ -261,6 +281,10 @@ mod tests { .expect("Failed to convert character varying"), DataType::Utf8 ); + assert_eq!( + pg_data_type_to_arrow_type("name", &context).expect("Failed to convert name"), + DataType::Utf8 + ); assert_eq!( pg_data_type_to_arrow_type("text", &context).expect("Failed to convert text"), DataType::Utf8 @@ -392,6 +416,15 @@ mod tests { .expect("Failed to convert character varying(255)"), DataType::Utf8 ); + assert_eq!( + pg_data_type_to_arrow_type("bit(8)", &context).expect("Failed to convert bit(8)"), + DataType::Binary + ); + assert_eq!( + pg_data_type_to_arrow_type("bit varying(64)", &context) + .expect("Failed to convert bit varying(64)"), + DataType::Binary + ); assert_eq!( pg_data_type_to_arrow_type("numeric(10,2)", &context) .expect("Failed to convert numeric(10,2)"), @@ -446,18 +479,65 @@ mod tests { DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8)) ); + // Test geometric types + assert_eq!( + pg_data_type_to_arrow_type("point", &context).expect("Failed to convert point"), + DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Float64, true)), 2) + ); + assert_eq!( + pg_data_type_to_arrow_type("line", &context).expect("Failed to convert line"), + DataType::Binary + ); + + // Test network address types + assert_eq!( + pg_data_type_to_arrow_type("inet", &context).expect("Failed to convert inet"), + DataType::Utf8 + ); + assert_eq!( + pg_data_type_to_arrow_type("cidr", &context).expect("Failed to convert cidr"), + DataType::Utf8 + ); + + // Test range types + assert_eq!( + pg_data_type_to_arrow_type("int4range", &context).expect("Failed to convert int4range"), + DataType::Struct(Fields::from(vec![ + Field::new("lower", DataType::Int32, true), + Field::new("upper", DataType::Int32, true), + ])) + ); + // Test JSON types assert_eq!( pg_data_type_to_arrow_type("json", &context).expect("Failed to convert json"), DataType::Utf8 ); + let jsonb_context = context + .clone() + .with_unsupported_type_action(UnsupportedTypeAction::String); + assert_eq!( + pg_data_type_to_arrow_type("jsonb", &jsonb_context).expect("Failed to convert jsonb"), + DataType::Utf8 + ); + // Test UUID type assert_eq!( pg_data_type_to_arrow_type("uuid", &context).expect("Failed to convert uuid"), DataType::Utf8 ); + // Test text search types + assert_eq!( + pg_data_type_to_arrow_type("tsvector", &context).expect("Failed to convert tsvector"), + DataType::LargeUtf8 + ); + assert_eq!( + pg_data_type_to_arrow_type("tsquery", &context).expect("Failed to convert tsquery"), + DataType::LargeUtf8 + ); + // Test bpchar type assert_eq!( pg_data_type_to_arrow_type("bpchar", &context).expect("Failed to convert bpchar"), diff --git a/core/src/sql/arrow_sql_gen/sqlite.rs b/core/src/sql/arrow_sql_gen/sqlite.rs index 150ad308..aeb71e65 100644 --- a/core/src/sql/arrow_sql_gen/sqlite.rs +++ b/core/src/sql/arrow_sql_gen/sqlite.rs @@ -17,38 +17,24 @@ limitations under the License. use std::sync::Arc; use crate::sql::arrow_sql_gen::arrow::map_data_type_to_array_builder; -use arrow::array::ArrayBuilder; -use arrow::array::ArrayRef; -use arrow::array::BinaryBuilder; -use arrow::array::BooleanBuilder; -use arrow::array::Float32Builder; -use arrow::array::Float64Builder; -use arrow::array::Int16Builder; -use arrow::array::Int32Builder; -use arrow::array::Int64Builder; -use arrow::array::Int8Builder; -use arrow::array::LargeStringBuilder; -use arrow::array::NullBuilder; -use arrow::array::RecordBatch; -use arrow::array::RecordBatchOptions; -use arrow::array::StringBuilder; -use arrow::array::UInt16Builder; -use arrow::array::UInt32Builder; -use arrow::array::UInt64Builder; -use arrow::array::UInt8Builder; -use arrow::datatypes::DataType; -use arrow::datatypes::Field; -use arrow::datatypes::Schema; -use arrow::datatypes::SchemaRef; -use rusqlite::types::Type; -use rusqlite::Row; -use rusqlite::Rows; +use arrow::{ + array::{ + ArrayBuilder, ArrayRef, BinaryBuilder, BooleanBuilder, Float32Builder, Float64Builder, + Int16Builder, Int32Builder, Int64Builder, Int8Builder, LargeStringBuilder, NullBuilder, + RecordBatch, RecordBatchOptions, StringBuilder, UInt16Builder, UInt32Builder, + UInt64Builder, UInt8Builder, + }, + datatypes::{DataType, Field, Schema, SchemaRef}, +}; +use rusqlite::{types::Type, Row, Rows}; use snafu::prelude::*; #[derive(Debug, Snafu)] pub enum Error { #[snafu(display("Failed to build record batch: {source}"))] - FailedToBuildRecordBatch { source: arrow::error::ArrowError }, + FailedToBuildRecordBatch { + source: datafusion::arrow::error::ArrowError, + }, #[snafu(display("No builder found for index {index}"))] NoBuilderForIndex { index: usize }, @@ -83,7 +69,7 @@ pub fn rows_to_arrow( if let Ok(Some(row)) = rows.next() { for i in 0..num_cols { - let column_type = row + let mut column_type = row .get_ref(i) .context(FailedToExtractRowValueSnafu)? .data_type(); @@ -93,6 +79,32 @@ pub fn rows_to_arrow( .context(FailedToExtractColumnNameSnafu)? .to_string(); + // SQLite can store floating point values without a fractional component as integers. + // Therefore, we need to verify if the column is actually a floating point type + // by examining the projected schema. + // Note: The same column may contain both integer and floating point values. + // Reading values as Float is safe even if the value is stored as an integer. + // Refer to the rusqlite type handling documentation for more details: + // https://github.com/rusqlite/rusqlite/blob/95680270eca6f405fb51f5fbe6a214aac5fdce58/src/types/mod.rs#L21C1-L22C75 + // + // `REAL` to integer: always returns an [`Error::InvalidColumnType`](crate::Error::InvalidColumnType) error. + // `INTEGER` to float: casts using `as` operator. Never fails. + // `REAL` to float: casts using `as` operator. Never fails. + + if column_type == Type::Integer { + if let Some(projected_schema) = projected_schema.as_ref() { + match projected_schema.fields[i].data_type() { + DataType::Decimal128(..) + | DataType::Float16 + | DataType::Float32 + | DataType::Float64 => { + column_type = Type::Real; + } + _ => {} + } + } + } + let data_type = match &projected_schema { Some(schema) => { to_sqlite_decoding_type(schema.fields()[i].data_type(), &column_type) diff --git a/core/src/sql/arrow_sql_gen/statement.rs b/core/src/sql/arrow_sql_gen/statement.rs index b86808e5..06b1d332 100644 --- a/core/src/sql/arrow_sql_gen/statement.rs +++ b/core/src/sql/arrow_sql_gen/statement.rs @@ -1,4 +1,6 @@ -use arrow::{ +use bigdecimal::BigDecimal; +use chrono::{DateTime, Offset, TimeZone}; +use datafusion::arrow::{ array::{ array, timezone::Tz, Array, ArrayRef, BooleanArray, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, LargeStringArray, RecordBatch, StringArray, @@ -7,18 +9,14 @@ use arrow::{ datatypes::{DataType, Field, Fields, IntervalUnit, Schema, SchemaRef, TimeUnit}, util::display::array_value_to_string, }; -use bigdecimal::BigDecimal; -use chrono::{DateTime, Offset, TimeZone}; use datafusion::sql::TableReference; use num_bigint::BigInt; use sea_query::{ Alias, ColumnDef, ColumnType, Expr, GenericBuilder, Index, InsertStatement, IntoIden, IntoIndexColumn, Keyword, MysqlQueryBuilder, OnConflict, PostgresQueryBuilder, Query, - QueryBuilder, SeaRc, SimpleExpr, SqliteQueryBuilder, StringLen, Table, TableRef, + QueryBuilder, SeaRc, SimpleExpr, SqliteQueryBuilder, Table, TableRef, }; use snafu::Snafu; -#[cfg(feature = "sqlite")] -use std::any::Any; use std::{str::FromStr, sync::Arc}; use time::{OffsetDateTime, PrimitiveDateTime}; @@ -39,6 +37,7 @@ pub struct CreateTableBuilder { schema: SchemaRef, table_name: String, primary_keys: Vec, + temporary: bool, } impl CreateTableBuilder { @@ -48,6 +47,7 @@ impl CreateTableBuilder { schema, table_name: table_name.to_string(), primary_keys: Vec::new(), + temporary: false, } } @@ -60,6 +60,13 @@ impl CreateTableBuilder { self } + #[must_use] + /// Set whether the table is temporary or not. + pub fn temporary(mut self, temporary: bool) -> Self { + self.temporary = temporary; + self + } + #[must_use] #[cfg(feature = "postgres")] pub fn build_postgres(self) -> Vec { @@ -108,6 +115,11 @@ impl CreateTableBuilder { #[must_use] pub fn build_mysql(self) -> String { self.build(MysqlQueryBuilder, &|f: &Arc| -> ColumnType { + // MySQL does not natively support Arrays, Structs, etc + // so we use JSON column type for List, FixedSizeList, LargeList, Struct, etc + if f.data_type().is_nested() { + return ColumnType::JsonBinary; + } map_data_type_to_column_type(f.data_type()) }) } @@ -142,6 +154,10 @@ impl CreateTableBuilder { create_stmt.primary_key(&mut index); } + if self.temporary { + create_stmt.temporary(); + } + create_stmt.to_string(query_builder) } } @@ -180,15 +196,24 @@ pub struct InsertBuilder { } pub fn use_json_insert_for_type( - _data_type: &DataType, + data_type: &DataType, #[allow(unused_variables)] query_builder: &T, ) -> bool { #[cfg(feature = "sqlite")] - if (query_builder as &dyn Any) - .downcast_ref::() - .is_some() { - return _data_type.is_nested(); + use std::any::Any; + let any_builder = query_builder as &dyn Any; + if any_builder.is::() { + return data_type.is_nested(); + } + } + #[cfg(feature = "mysql")] + { + use std::any::Any; + let any_builder = query_builder as &dyn Any; + if any_builder.is::() { + return data_type.is_nested(); + } } false } @@ -1310,9 +1335,11 @@ pub(crate) fn map_data_type_to_column_type(data_type: &DataType) -> ColumnType { | DataType::FixedSizeList(list_type, _) => { ColumnType::Array(map_data_type_to_column_type(list_type.data_type()).into()) } - DataType::Binary | DataType::LargeBinary | DataType::BinaryView => { - ColumnType::VarBinary(StringLen::Max) - } + // Originally mapped to VarBinary type, corresponding to MySQL's varbinary, which has a maximum length of 65535. + // This caused the error: "Row size too large. The maximum row size for the used table type, not counting BLOBs, is 65535. + // This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs." + // Changing to Blob fixes this issue. This change does not affect Postgres, and for Sqlite, the mapping type changes from varbinary_blob to blob. + DataType::Binary | DataType::LargeBinary | DataType::BinaryView => ColumnType::Blob, DataType::FixedSizeBinary(num_bytes) => ColumnType::Binary(num_bytes.to_owned() as u32), DataType::Interval(_) => ColumnType::Interval(None, None), // Add more mappings here as needed @@ -1391,7 +1418,7 @@ fn insert_struct_into_row_values_json( source: Box::new(e), })?; - let mut writer = arrow_json::LineDelimitedWriter::new(Vec::new()); + let mut writer = datafusion::arrow::json::LineDelimitedWriter::new(Vec::new()); writer .write(&batch) .map_err(|e| Error::FailedToCreateInsertStatement { @@ -1419,7 +1446,7 @@ mod tests { use std::sync::Arc; use super::*; - use arrow::datatypes::{DataType, Field, Int32Type, Schema}; + use datafusion::arrow::datatypes::{DataType, Field, Int32Type, Schema}; #[test] fn test_basic_table_creation() { @@ -1536,6 +1563,20 @@ mod tests { assert_eq!(sql, "CREATE TABLE IF NOT EXISTS \"users\" ( \"id\" integer NOT NULL, \"id2\" integer NOT NULL, \"name\" text NOT NULL, \"age\" integer, PRIMARY KEY (\"id\", \"id2\") )"); } + #[test] + fn test_temporary_table_creation() { + let schema = Schema::new(vec![ + Field::new("id", DataType::Int32, false), + Field::new("name", DataType::Utf8, false), + ]); + let sql = CreateTableBuilder::new(SchemaRef::new(schema), "users") + .primary_keys(vec!["id"]) + .temporary(true) + .build_sqlite(); + + assert_eq!(sql, "CREATE TEMPORARY TABLE IF NOT EXISTS \"users\" ( \"id\" integer NOT NULL, \"name\" text NOT NULL, PRIMARY KEY (\"id\") )"); + } + #[test] fn test_table_insertion_with_list() { let schema1 = Schema::new(vec![Field::new( diff --git a/core/src/sql/db_connection_pool/clickhousepool.rs b/core/src/sql/db_connection_pool/clickhousepool.rs new file mode 100644 index 00000000..018b9f9e --- /dev/null +++ b/core/src/sql/db_connection_pool/clickhousepool.rs @@ -0,0 +1,124 @@ +use std::collections::HashMap; + +use clickhouse::{Client, Compression}; +use secrecy::{ExposeSecret, SecretString}; +use snafu::{ResultExt, Snafu}; + +use super::{dbconnection::DbConnection, DbConnectionPool, JoinPushDown}; + +#[derive(Debug, Snafu)] +pub enum Error { + #[snafu(display("ClickHouse connection failed. {source}"))] + ConnectionError { source: clickhouse::error::Error }, + + #[snafu(display("Invalid connection string for ClickHouse. {source}"))] + InvalidConnectionString { source: url::ParseError }, + + #[snafu(display("Invalid value for parameter {parameter_name}. Ensure the value is valid for parameter {parameter_name}"))] + InvalidParameterError { parameter_name: String }, +} + +#[derive(Clone)] +pub struct ClickHouseConnectionPool { + pub client: Client, + pub join_push_down: JoinPushDown, +} + +impl std::fmt::Debug for ClickHouseConnectionPool { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ClickHouseConnectionPool") + .field("join_push_down", &self.join_push_down) + .finish() + } +} + +impl ClickHouseConnectionPool { + pub async fn new(params: HashMap) -> Result { + let mut client = Client::default(); + let mut url = None; + let mut database = None; + + for (key, value) in ¶ms { + let value = value.expose_secret(); + match key.as_str() { + "url" => { + client = client.with_url(value); + url = Some(value) + } + "database" => { + client = client.with_database(value); + database = Some(value) + } + "user" => { + client = client.with_user(value); + } + "password" => { + client = client.with_password(value); + } + "access_token" => { + client = client.with_access_token(value); + } + "compression" => { + client = match value.to_lowercase().as_str() { + "lz4" => client.with_compression(Compression::Lz4), + "none" => client.with_compression(Compression::None), + other => { + return Err(Error::InvalidParameterError { + parameter_name: format!("compression = {}", other), + }); + } + }; + } + key if key.starts_with("option_") => { + let opt = &key["option_".len()..]; + client = client.with_option(opt, value); + } + key if key.starts_with("header_") => { + let header = &key["header_".len()..]; + client = client.with_header(header, value); + } + key if key.starts_with("product_") => { + let name = &key["product_".len()..]; + client = client.with_product_info(name, value); + } + _ => { + // Unknown keys are ignored silently or optionally warn + } + } + } + + client + .query("SELECT 1") + .fetch_all::() + .await + .context(ConnectionSnafu)?; + + let join_push_down = { + let mut ctx = format!("url={}", url.unwrap_or("default")); + if let Some(db) = database { + ctx.push_str(&format!(",db={}", db)); + } + JoinPushDown::AllowedFor(ctx) + }; + + Ok(Self { + client, + join_push_down, + }) + } + + pub fn client(&self) -> Client { + self.client.clone() + } +} + +#[async_trait::async_trait] +impl DbConnectionPool for ClickHouseConnectionPool { + async fn connect(&self) -> super::Result>> { + Ok(Box::new(self.client())) + } + + fn join_push_down(&self) -> JoinPushDown { + self.join_push_down.clone() + } +} diff --git a/core/src/sql/db_connection_pool/dbconnection.rs b/core/src/sql/db_connection_pool/dbconnection.rs index 8b0f807e..b1867a3d 100644 --- a/core/src/sql/db_connection_pool/dbconnection.rs +++ b/core/src/sql/db_connection_pool/dbconnection.rs @@ -5,10 +5,14 @@ use datafusion::{ }; use snafu::prelude::*; +#[cfg(feature = "clickhouse")] +pub mod clickhouseconn; #[cfg(feature = "duckdb")] pub mod duckdbconn; #[cfg(feature = "mysql")] pub mod mysqlconn; +#[cfg(feature = "odbc")] +pub mod odbcconn; #[cfg(feature = "postgres")] pub mod postgresconn; #[cfg(feature = "sqlite")] @@ -41,6 +45,12 @@ pub enum Error { table_name: String, source: GenericError, }, + + #[snafu(display("Unable to get schemas: {source}"))] + UnableToGetSchemas { source: GenericError }, + + #[snafu(display("Unable to get tables: {source}"))] + UnableToGetTables { source: GenericError }, } pub trait SyncDbConnection: DbConnection { @@ -48,6 +58,10 @@ pub trait SyncDbConnection: DbConnection { where Self: Sized; + fn tables(&self, schema: &str) -> Result, Error>; + + fn schemas(&self) -> Result, Error>; + /// Get the schema for a table reference. /// /// # Arguments @@ -95,13 +109,42 @@ pub trait AsyncDbConnection: DbConnection + Sync { fn new(conn: T) -> Self where Self: Sized; + + async fn tables(&self, schema: &str) -> Result, Error>; + + async fn schemas(&self) -> Result, Error>; + + /// Get the schema for a table reference. + /// + /// # Arguments + /// + /// * `table_reference` - The table reference. async fn get_schema(&self, table_reference: &TableReference) -> Result; + + /// Query the database with the given SQL statement and parameters, returning a `Result` of `SendableRecordBatchStream`. + /// + /// # Arguments + /// + /// * `sql` - The SQL statement. + /// * `params` - The parameters for the SQL statement. + /// * `projected_schema` - The Projected schema for the query. + /// + /// # Errors + /// + /// Returns an error if the query fails. async fn query_arrow( &self, sql: &str, params: &[P], projected_schema: Option, ) -> Result; + + /// Execute the given SQL statement with parameters, returning the number of affected rows. + /// + /// # Arguments + /// + /// * `sql` - The SQL statement. + /// * `params` - The parameters for the SQL statement. async fn execute(&self, sql: &str, params: &[P]) -> Result; } @@ -117,6 +160,36 @@ pub trait DbConnection: Send { } } +pub async fn get_tables( + conn: Box>, + schema: &str, +) -> Result, Error> { + let schema = if let Some(conn) = conn.as_sync() { + conn.tables(schema)? + } else if let Some(conn) = conn.as_async() { + conn.tables(schema).await? + } else { + return Err(Error::UnableToDowncastConnection {}); + }; + Ok(schema) +} + +/// Get the schemas for the database. +/// +/// # Errors +/// +/// Returns an error if the schemas cannot be retrieved. +pub async fn get_schemas(conn: Box>) -> Result, Error> { + let schema = if let Some(conn) = conn.as_sync() { + conn.schemas()? + } else if let Some(conn) = conn.as_async() { + conn.schemas().await? + } else { + return Err(Error::UnableToDowncastConnection {}); + }; + Ok(schema) +} + /// Get the schema for a table reference. /// /// # Arguments @@ -130,7 +203,7 @@ pub trait DbConnection: Send { pub async fn get_schema( conn: Box>, table_reference: &datafusion::sql::TableReference, -) -> Result, Error> { +) -> Result, Error> { let schema = if let Some(conn) = conn.as_sync() { conn.get_schema(table_reference)? } else if let Some(conn) = conn.as_async() { diff --git a/core/src/sql/db_connection_pool/dbconnection/clickhouseconn.rs b/core/src/sql/db_connection_pool/dbconnection/clickhouseconn.rs new file mode 100644 index 00000000..81d847d6 --- /dev/null +++ b/core/src/sql/db_connection_pool/dbconnection/clickhouseconn.rs @@ -0,0 +1,256 @@ +use std::io::Cursor; +use std::{any::Any, sync::Arc}; + +use arrow::array::RecordBatch; +use arrow_ipc::reader::{StreamDecoder, StreamReader}; +use async_trait::async_trait; +use clickhouse::{Client, Row}; +use datafusion::arrow::datatypes::{Schema, SchemaRef}; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; +use datafusion::physical_plan::EmptyRecordBatchStream; +use datafusion::{execution::SendableRecordBatchStream, sql::TableReference}; +use regex::Regex; +use serde::Deserialize; +use snafu::ResultExt; + +use super::{AsyncDbConnection, DbConnection, Error, SyncDbConnection}; + +impl DbConnection for Client { + fn as_any(&self) -> &dyn Any { + self + } + + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } + + fn as_sync(&self) -> Option<&dyn SyncDbConnection> { + None + } + + fn as_async(&self) -> Option<&dyn AsyncDbConnection> { + Some(self) + } +} + +#[async_trait] +impl AsyncDbConnection for Client { + fn new(conn: Client) -> Self + where + Self: Sized, + { + conn + } + + async fn tables(&self, schema: &str) -> Result, Error> { + #[derive(Row, Deserialize)] + struct Row { + name: String, + } + + let tables: Vec = self + .query("SELECT name FROM system.tables WHERE database = ?") + .bind(schema) + .fetch_all() + .await + .boxed() + .context(super::UnableToGetTablesSnafu)?; + + Ok(tables.into_iter().map(|x| x.name).collect()) + } + + async fn schemas(&self) -> Result, Error> { + #[derive(Row, Deserialize)] + struct Row { + name: String, + } + let tables: Vec = self + .query("SELECT name FROM system.databases WHERE name NOT IN ('system', 'information_schema', 'INFORMATION_SCHEMA')") + .fetch_all() + .await + .boxed() + .context(super::UnableToGetSchemasSnafu)?; + + Ok(tables.into_iter().map(|x| x.name).collect()) + } + + /// Get the schema for a table reference. + /// + /// # Arguments + /// + /// * `table_reference` - The table reference. + async fn get_schema(&self, table_reference: &TableReference) -> Result { + #[derive(Row, Deserialize)] + struct CatalogRow { + db: String, + } + + let database = match table_reference.schema() { + Some(db) => db.to_string(), + None => { + let row: CatalogRow = self + .query("SELECT currentDatabase() AS db") + .fetch_one() + .await + .boxed() + .context(super::UnableToGetSchemaSnafu)?; + row.db + } + }; + + #[derive(Row, Deserialize)] + struct TableInfoRow { + engine: String, + as_select: String, + } + + let table_info: TableInfoRow = self + .query("SELECT engine, as_select FROM system.tables WHERE database = ? AND name = ?") + .bind(&database) + .bind(table_reference.table()) + .fetch_one() + .await + .boxed() + .context(super::UnableToGetSchemaSnafu)?; + + let is_view = matches!( + table_info.engine.to_uppercase().as_str(), + "VIEW" | "MATERIALIZEDVIEW" + ); + + let statement = if is_view { + let view_query = table_info.as_select; + format!( + "SELECT * FROM ({}) LIMIT 0", + replace_clickhouse_ddl_parameters(&view_query) + ) + } else { + let table_ref = TableReference::partial(database, table_reference.table()); + format!("SELECT * FROM {} LIMIT 0", table_ref.to_quoted_string()) + }; + + let mut bytes = self + .query(&statement) + .fetch_bytes("ArrowStream") + .boxed() + .context(super::UnableToGetSchemaSnafu)?; + + let reader = bytes + .collect() + .await + .boxed() + .and_then(|bytes| StreamReader::try_new(Cursor::new(bytes), None).boxed()) + .context(super::UnableToGetSchemaSnafu)?; + + return Ok(reader.schema()); + } + + /// Query the database with the given SQL statement and parameters, returning a `Result` of `SendableRecordBatchStream`. + /// + /// # Arguments + /// + /// * `sql` - The SQL statement. + /// * `params` - The parameters for the SQL statement. + /// * `projected_schema` - The Projected schema for the query. + /// + /// # Errors + /// + /// Returns an error if the query fails. + async fn query_arrow( + &self, + sql: &str, + _params: &[()], + projected_schema: Option, + ) -> super::Result { + let query = self.query(sql); + + let mut bytes_stream = query + .fetch_bytes("ArrowStream") + .boxed() + .context(super::UnableToQueryArrowSnafu)?; + + let mut first_batch: Option = None; + let mut decoder = StreamDecoder::new(); + + // fetch till first set of records + while let Some(buf) = bytes_stream.next().await? { + if let Some(batch) = decoder.decode(&mut buf.into())? { + first_batch = Some(batch); + break; + } + } + + if let Some(first_batch) = first_batch { + let schema = first_batch.schema(); + let stream = async_stream::stream! { + yield Ok(first_batch); + while let Some(buf) = bytes_stream + .next() + .await + .map_err(|er| arrow::error::ArrowError::ExternalError(Box::new(er)))? + { + if let Some(batch) = decoder.decode(&mut buf.into())? { + yield Ok(batch); + } + } + }; + Ok(Box::pin(RecordBatchStreamAdapter::new(schema, stream))) + } else if let Some(schema) = projected_schema { + Ok(Box::pin(RecordBatchStreamAdapter::new( + schema.clone(), + EmptyRecordBatchStream::new(schema), + ))) + } else { + let schema: Arc = Schema::empty().into(); + Ok(Box::pin(RecordBatchStreamAdapter::new( + schema.clone(), + EmptyRecordBatchStream::new(schema), + ))) + } + } + + /// Execute the given SQL statement with parameters, returning the number of affected rows. + /// + /// # Arguments + /// + /// * `sql` - The SQL statement. + /// * `params` - The parameters for the SQL statement. + async fn execute(&self, sql: &str, params: &[()]) -> super::Result { + let mut query = self.query(sql); + + for param in params { + query = query.bind(param); + } + + query + .execute() + .await + .boxed() + .context(super::UnableToQueryArrowSnafu)?; + + Ok(0) + } +} + +pub fn replace_clickhouse_ddl_parameters(ddl_query: &str) -> String { + // Regex to find parameters in the format {parameter_name:DataType} + let param_pattern = Regex::new(r"\{(\w+?):(\w+?)\}").unwrap(); + + let modified_query = param_pattern.replace_all(ddl_query, |caps: ®ex::Captures| { + // match against the datatype + let data_type = caps.get(2).map_or("", |m| m.as_str()); + match data_type.to_lowercase().as_str() { + "string" => "''".to_string(), + "uint8" | "uint16" | "uint32" | "uint64" | "int8" | "int16" | "int32" | "int64" => { + "0".to_string() + } + "float32" | "float64" => "0.0".to_string(), + "date" => "'2000-01-01'".to_string(), + "datetime" => "'2000-01-01 00:00:00'".to_string(), + "bool" => "false".to_string(), + _ => "''".to_string(), + } + }); + + modified_query.into_owned() +} diff --git a/core/src/sql/db_connection_pool/dbconnection/duckdbconn.rs b/core/src/sql/db_connection_pool/dbconnection/duckdbconn.rs index 32c6ac1d..6900eee6 100644 --- a/core/src/sql/db_connection_pool/dbconnection/duckdbconn.rs +++ b/core/src/sql/db_connection_pool/dbconnection/duckdbconn.rs @@ -17,10 +17,11 @@ use duckdb::vtab::to_duckdb_type_id; use duckdb::ToSql; use duckdb::{Connection, DuckdbConnectionManager}; use dyn_clone::DynClone; -use rand::distributions::{Alphanumeric, DistString}; +use rand::distr::{Alphanumeric, SampleString}; use snafu::{prelude::*, ResultExt}; use tokio::sync::mpsc::Sender; +use crate::sql::db_connection_pool::runtime::run_sync_with_tokio; use crate::util::schema::SchemaValidator; use crate::UnsupportedTypeAction; @@ -73,7 +74,7 @@ impl DuckDBAttachments { /// Creates a new instance of a `DuckDBAttachments`, which instructs DuckDB connections to attach other DuckDB databases for queries. #[must_use] pub fn new(main_db: &str, attachments: &[Arc]) -> Self { - let random_id = Alphanumeric.sample_string(&mut rand::thread_rng(), 8); + let random_id = Alphanumeric.sample_string(&mut rand::rng(), 8); let attachments: HashSet> = attachments.iter().cloned().collect(); Self { attachments, @@ -349,6 +350,54 @@ impl SyncDbConnection, DuckDBPar } } + fn tables(&self, schema: &str) -> Result, super::Error> { + let sql = "SELECT table_name FROM information_schema.tables \ + WHERE table_schema = ? AND table_type = 'BASE TABLE'"; + + let mut stmt = self + .conn + .prepare(sql) + .boxed() + .context(super::UnableToGetTablesSnafu)?; + let mut rows = stmt + .query([schema]) + .boxed() + .context(super::UnableToGetTablesSnafu)?; + let mut tables = vec![]; + + while let Some(row) = rows.next().boxed().context(super::UnableToGetTablesSnafu)? { + tables.push(row.get(0).boxed().context(super::UnableToGetTablesSnafu)?); + } + + Ok(tables) + } + + fn schemas(&self) -> Result, super::Error> { + let sql = "SELECT DISTINCT schema_name FROM information_schema.schemata \ + WHERE schema_name NOT IN ('information_schema', 'pg_catalog')"; + + let mut stmt = self + .conn + .prepare(sql) + .boxed() + .context(super::UnableToGetSchemasSnafu)?; + let mut rows = stmt + .query([]) + .boxed() + .context(super::UnableToGetSchemasSnafu)?; + let mut schemas = vec![]; + + while let Some(row) = rows + .next() + .boxed() + .context(super::UnableToGetSchemasSnafu)? + { + schemas.push(row.get(0).boxed().context(super::UnableToGetSchemasSnafu)?); + } + + Ok(schemas) + } + fn get_schema(&self, table_reference: &TableReference) -> Result { let table_str = if is_table_function(table_reference) { table_reference.to_string() @@ -401,45 +450,49 @@ impl SyncDbConnection, DuckDBPar let cloned_schema = schema.clone(); - let join_handle = tokio::task::spawn_blocking(move || { - let mut stmt = conn.prepare(&sql).context(DuckDBQuerySnafu)?; - let params: &[&dyn ToSql] = ¶ms - .iter() - .map(|f| f.as_input_parameter()) - .collect::>(); - let result: duckdb::ArrowStream<'_> = stmt - .stream_arrow(params, cloned_schema) - .context(DuckDBQuerySnafu)?; - for i in result { - blocking_channel_send(&batch_tx, i)?; - } - Ok::<_, Box>(()) - }); + let create_stream = || -> Result { + let join_handle = tokio::task::spawn_blocking(move || { + let mut stmt = conn.prepare(&sql).context(DuckDBQuerySnafu)?; + let params: &[&dyn ToSql] = ¶ms + .iter() + .map(|f| f.as_input_parameter()) + .collect::>(); + let result: duckdb::ArrowStream<'_> = stmt + .stream_arrow(params, cloned_schema) + .context(DuckDBQuerySnafu)?; + for i in result { + blocking_channel_send(&batch_tx, i)?; + } + Ok::<_, Box>(()) + }); - let output_stream = stream! { - while let Some(batch) = batch_rx.recv().await { - yield Ok(batch); - } + let output_stream = stream! { + while let Some(batch) = batch_rx.recv().await { + yield Ok(batch); + } - match join_handle.await { - Ok(Err(task_error)) => { - yield Err(DataFusionError::Execution(format!( - "Failed to execute DuckDB query: {task_error}" - ))) - }, - Err(join_error) => { - yield Err(DataFusionError::Execution(format!( - "Failed to execute DuckDB query: {join_error}" - ))) - }, - _ => {} - } + match join_handle.await { + Ok(Err(task_error)) => { + yield Err(DataFusionError::Execution(format!( + "Failed to execute DuckDB query: {task_error}" + ))) + }, + Err(join_error) => { + yield Err(DataFusionError::Execution(format!( + "Failed to execute DuckDB query: {join_error}" + ))) + }, + _ => {} + } + }; + + Ok(Box::pin(RecordBatchStreamAdapter::new( + schema, + output_stream, + ))) }; - Ok(Box::pin(RecordBatchStreamAdapter::new( - schema, - output_stream, - ))) + run_sync_with_tokio(create_stream) } fn execute(&self, sql: &str, params: &[DuckDBParameter]) -> Result { diff --git a/core/src/sql/db_connection_pool/dbconnection/mysqlconn.rs b/core/src/sql/db_connection_pool/dbconnection/mysqlconn.rs index c18722e5..92af1563 100644 --- a/core/src/sql/db_connection_pool/dbconnection/mysqlconn.rs +++ b/core/src/sql/db_connection_pool/dbconnection/mysqlconn.rs @@ -2,8 +2,8 @@ use std::{any::Any, sync::Arc}; use crate::sql::arrow_sql_gen::mysql::map_column_to_data_type; use crate::sql::arrow_sql_gen::{self, mysql::rows_to_arrow}; -use arrow::datatypes::{Field, Schema, SchemaRef}; use async_stream::stream; +use datafusion::arrow::datatypes::{Field, Schema, SchemaRef}; use datafusion::error::DataFusionError; use datafusion::execution::SendableRecordBatchStream; use datafusion::physical_plan::stream::RecordBatchStreamAdapter; @@ -93,6 +93,47 @@ impl<'a> AsyncDbConnection for MySQLConnection { } } + async fn tables(&self, schema: &str) -> Result, super::Error> { + let mut conn = self.conn.lock().await; + let conn = &mut *conn; + + let query = "SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = ?"; + let tables: Vec = conn + .exec(query, (schema,)) + .await + .boxed() + .context(super::UnableToGetTablesSnafu)?; + + let table_names = tables + .iter() + .filter_map(|row| row.get::("TABLE_NAME")) + .collect(); + + Ok(table_names) + } + + async fn schemas(&self) -> Result, super::Error> { + let mut conn = self.conn.lock().await; + let conn = &mut *conn; + + let query = "SELECT SCHEMA_NAME FROM INFORMATION_SCHEMA.SCHEMATA \ + WHERE SCHEMA_NAME NOT IN ('information_schema', 'mysql', \ + 'performance_schema', 'sys')"; + + let schemas: Vec = conn + .exec(query, ()) + .await + .boxed() + .context(super::UnableToGetSchemasSnafu)?; + + let schema_names = schemas + .iter() + .filter_map(|row| row.get::("SCHEMA_NAME")) + .collect(); + + Ok(schema_names) + } + async fn get_schema( &self, table_reference: &TableReference, diff --git a/core/src/sql/db_connection_pool/dbconnection/odbcconn.rs b/core/src/sql/db_connection_pool/dbconnection/odbcconn.rs new file mode 100644 index 00000000..f3605a5f --- /dev/null +++ b/core/src/sql/db_connection_pool/dbconnection/odbcconn.rs @@ -0,0 +1,320 @@ +/* +Copyright 2024 The Spice.ai OSS Authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +use std::any::Any; +use std::collections::HashMap; +use std::sync::Arc; + +use crate::sql::db_connection_pool::{ + dbconnection::{self, AsyncDbConnection, DbConnection, GenericError}, + runtime::run_async_with_tokio, + DbConnectionPool, +}; +use arrow_odbc::arrow_schema_from; +use arrow_odbc::odbc_api::{ + self, handles::SqlResult, handles::Statement, handles::StatementImpl, + parameter::InputParameter, Connection, Cursor, CursorImpl, +}; +use arrow_odbc::OdbcReader; +use arrow_odbc::OdbcReaderBuilder; +use async_stream::stream; +use async_trait::async_trait; +use datafusion::arrow::datatypes::Schema; +use datafusion::arrow::datatypes::SchemaRef; +use datafusion::arrow::record_batch::RecordBatch; +use datafusion::error::DataFusionError; +use datafusion::execution::SendableRecordBatchStream; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; +use datafusion::sql::TableReference; +use dyn_clone::DynClone; +use futures::lock::Mutex; +use secrecy::{ExposeSecret, SecretBox, SecretString}; +use snafu::prelude::*; +use snafu::Snafu; +use tokio::runtime::Handle; + +use tokio::sync::mpsc::Sender; + +type Result = std::result::Result; + +pub trait ODBCSyncParameter: InputParameter + Sync + Send + DynClone { + fn as_input_parameter(&self) -> &dyn InputParameter; +} + +impl ODBCSyncParameter for T { + fn as_input_parameter(&self) -> &dyn InputParameter { + self + } +} + +dyn_clone::clone_trait_object!(ODBCSyncParameter); + +pub type ODBCParameter = Box; +pub type ODBCDbConnection<'a> = dyn DbConnection, ODBCParameter>; +pub type ODBCDbConnectionPool<'a> = + dyn DbConnectionPool, ODBCParameter> + Sync + Send; + +#[derive(Debug, Snafu)] +pub enum Error { + #[snafu(display("Failed to convert query result to Arrow: {source}"))] + ArrowError { + source: datafusion::arrow::error::ArrowError, + }, + #[snafu(display("arrow_odbc error: {source}"))] + ArrowODBCError { source: arrow_odbc::Error }, + #[snafu(display("odbc_api Error: {source}"))] + ODBCAPIError { source: odbc_api::Error }, + #[snafu(display("odbc_api Error: {message}"))] + ODBCAPIErrorNoSource { message: String }, + #[snafu(display("Failed to convert query result to Arrow: {source}"))] + TryFromError { source: std::num::TryFromIntError }, + #[snafu(display("Unable to bind integer parameter: {source}"))] + UnableToBindIntParameter { source: std::num::TryFromIntError }, + #[snafu(display("Internal communication channel error: {message}"))] + ChannelError { message: String }, +} + +pub struct ODBCConnection<'a> { + pub conn: Arc>>, + pub params: Arc>, +} + +impl<'a> DbConnection, ODBCParameter> for ODBCConnection<'a> +where + 'a: 'static, +{ + fn as_any(&self) -> &dyn Any { + self + } + + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } + + fn as_async(&self) -> Option<&dyn AsyncDbConnection, ODBCParameter>> { + Some(self) + } +} + +fn blocking_channel_send(channel: &Sender, item: T) -> Result<()> { + match channel.blocking_send(item) { + Ok(()) => Ok(()), + Err(e) => Err(Error::ChannelError { + message: format!("{e}"), + } + .into()), + } +} + +#[async_trait] +impl<'a> AsyncDbConnection, ODBCParameter> for ODBCConnection<'a> +where + 'a: 'static, +{ + fn new(conn: Connection<'a>) -> Self { + ODBCConnection { + conn: Arc::new(conn.into()), + params: Arc::new(HashMap::new()), + } + } + + async fn tables(&self, _schema: &str) -> Result, super::Error> { + unimplemented!() + } + + async fn schemas(&self) -> Result, super::Error> { + unimplemented!() + } + + async fn get_schema( + &self, + table_reference: &TableReference, + ) -> Result { + let cxn = self.conn.lock().await; + + let mut prepared = cxn + .prepare(&format!( + "SELECT * FROM {} LIMIT 1", + table_reference.to_quoted_string() + )) + .boxed() + .map_err(|e| dbconnection::Error::UnableToGetSchema { source: e })?; + + let schema = Arc::new( + arrow_schema_from(&mut prepared, None, false) + .boxed() + .map_err(|e| dbconnection::Error::UnableToGetSchema { source: e })?, + ); + + Ok(schema) + } + + async fn query_arrow( + &self, + sql: &str, + params: &[ODBCParameter], + _projected_schema: Option, + ) -> Result { + // prepare some tokio channels to communicate query results back from the thread + let (batch_tx, mut batch_rx) = tokio::sync::mpsc::channel::(4); + let (schema_tx, mut schema_rx) = tokio::sync::mpsc::channel::>(1); + + // clone internals and parameters to let the thread own them + let conn = Arc::clone(&self.conn); // clones the mutex not the connection, so we can .lock a connection inside the thread + let sql = sql.to_string(); + + // ODBCParameter is a dynamic trait object, so we can't use std::clone::Clone because it's not object safe + // DynClone provides an object-safe clone trait, which we use to clone the boxed parameters + let params = params.iter().map(dyn_clone::clone).collect::>(); + let secrets = Arc::clone(&self.params); + + let create_stream = async || -> Result { + let join_handle = tokio::task::spawn_blocking(move || { + let handle = Handle::current(); + let cxn = handle.block_on(async { conn.lock().await }); + + let mut prepared = cxn.prepare(&sql)?; + let schema = Arc::new(arrow_schema_from(&mut prepared, None, false)?); + blocking_channel_send(&schema_tx, Arc::clone(&schema))?; + + let mut statement = prepared.into_handle(); + + bind_parameters(&mut statement, ¶ms)?; + + // StatementImpl<'_>::execute is unsafe, CursorImpl<_>::new is unsafe + let cursor = unsafe { + if let SqlResult::Error { function } = statement.execute() { + return Err(Error::ODBCAPIErrorNoSource { + message: function.to_string(), + } + .into()); + } + + Ok::<_, GenericError>(CursorImpl::new(statement.as_stmt_ref())) + }?; + + let reader = build_odbc_reader(cursor, &schema, &secrets)?; + for batch in reader { + blocking_channel_send(&batch_tx, batch.context(ArrowSnafu)?)?; + } + + Ok::<_, GenericError>(()) + }); + + // we need to wait for the schema first before we can build our RecordBatchStreamAdapter + let Some(schema) = schema_rx.recv().await else { + // if the channel drops, the task errored + if !join_handle.is_finished() { + unreachable!("Schema channel should not have dropped before the task finished"); + } + + let result = join_handle.await?; + let Err(err) = result else { + unreachable!("Task should have errored"); + }; + + return Err(err); + }; + + let output_stream = stream! { + while let Some(batch) = batch_rx.recv().await { + yield Ok(batch); + } + + if let Err(e) = join_handle.await { + yield Err(DataFusionError::Execution(format!( + "Failed to execute ODBC query: {e}" + ))) + } + }; + + let result: SendableRecordBatchStream = + Box::pin(RecordBatchStreamAdapter::new(schema, output_stream)); + Ok(result) + }; + run_async_with_tokio(create_stream).await + } + + async fn execute(&self, query: &str, params: &[ODBCParameter]) -> Result { + let cxn = self.conn.lock().await; + let prepared = cxn.prepare(query)?; + let mut statement = prepared.into_handle(); + + bind_parameters(&mut statement, params)?; + + let row_count = unsafe { + statement.execute().unwrap(); + statement.row_count() + }; + + Ok(row_count.unwrap().try_into().context(TryFromSnafu)?) + } +} + +fn build_odbc_reader( + cursor: C, + schema: &Arc, + params: &HashMap, +) -> Result, Error> { + let mut builder = OdbcReaderBuilder::new(); + builder.with_schema(Arc::clone(schema)); + + let bind_as_usize = |k: &str, default: Option, f: &mut dyn FnMut(usize)| { + params + .get(k) + .map(SecretBox::expose_secret) + .and_then(|s| s.parse::().ok()) + .or(default) + .into_iter() + .for_each(f); + }; + + bind_as_usize("max_binary_size", None, &mut |s| { + builder.with_max_binary_size(s); + }); + bind_as_usize("max_text_size", None, &mut |s| { + builder.with_max_text_size(s); + }); + bind_as_usize("max_bytes_per_batch", Some(512_000_000), &mut |s| { + builder.with_max_bytes_per_batch(s); + }); + + // larger default max_num_rows_per_batch reduces IO overhead but increases memory usage + // lower numbers reduce memory usage but increase IO overhead + bind_as_usize("max_num_rows_per_batch", Some(4000), &mut |s| { + builder.with_max_num_rows_per_batch(s); + }); + + builder.build(cursor).context(ArrowODBCSnafu) +} + +/// Binds parameter to an ODBC statement. +/// +/// `StatementImpl<'_>::bind_input_parameter` is unsafe. +fn bind_parameters(statement: &mut StatementImpl, params: &[ODBCParameter]) -> Result<(), Error> { + for (i, param) in params.iter().enumerate() { + unsafe { + statement + .bind_input_parameter( + (i + 1).try_into().context(UnableToBindIntParameterSnafu)?, + param.as_input_parameter(), + ) + .unwrap(); + } + } + + Ok(()) +} diff --git a/core/src/sql/db_connection_pool/dbconnection/postgresconn.rs b/core/src/sql/db_connection_pool/dbconnection/postgresconn.rs index 0e151fd2..675c86b7 100644 --- a/core/src/sql/db_connection_pool/dbconnection/postgresconn.rs +++ b/core/src/sql/db_connection_pool/dbconnection/postgresconn.rs @@ -1,14 +1,12 @@ use std::any::Any; use std::error::Error; use std::sync::Arc; -use tokio_postgres::Row; use crate::sql::arrow_sql_gen::postgres::rows_to_arrow; use crate::sql::arrow_sql_gen::postgres::schema::pg_data_type_to_arrow_type; use crate::sql::arrow_sql_gen::postgres::schema::ParseContext; use crate::util::handle_unsupported_type_error; use crate::util::schema::SchemaValidator; -use crate::UnsupportedTypeAction; use arrow::datatypes::Field; use arrow::datatypes::Schema; use arrow::datatypes::SchemaRef; @@ -16,7 +14,6 @@ use arrow_schema::DataType; use async_stream::stream; use bb8_postgres::tokio_postgres::types::ToSql; use bb8_postgres::PostgresConnectionManager; -use bitflags::bitflags; use datafusion::error::DataFusionError; use datafusion::execution::SendableRecordBatchStream; use datafusion::physical_plan::stream::RecordBatchStreamAdapter; @@ -26,17 +23,12 @@ use futures::StreamExt; use postgres_native_tls::MakeTlsConnector; use snafu::prelude::*; +use crate::UnsupportedTypeAction; + use super::AsyncDbConnection; use super::DbConnection; use super::Result; -bitflags! { - #[derive(PartialEq)] - pub struct PostgresConnectionQuirks: u8 { - const CheckVariant = 0b0000_0001; - } -} - const SCHEMA_QUERY: &str = r" WITH custom_type_details AS ( SELECT @@ -111,37 +103,18 @@ WHERE ns.nspname = $1 ORDER BY a.attnum; "; -const REDSHIFT_SCHEMA_QUERY: &str = r#" -SELECT - a.attname AS column_name, - CASE - WHEN t.typelem != 0 AND et.typname IS NOT NULL THEN 'array' - ELSE pg_catalog.format_type(a.atttypid, a.atttypmod) - END AS data_type, - CASE WHEN a.attnotnull THEN 'NO' ELSE 'YES' END AS is_nullable, - CASE - WHEN t.typelem != 0 AND et.typname IS NOT NULL THEN - -- JSON_PARSE returns a super type tokio-postgres does not understand, so cast to text - JSON_PARSE('{"type": "array", "element_type": "' || et.typname || '"}')::text - ELSE NULL - END AS type_details -FROM pg_class cls -JOIN pg_namespace ns ON cls.relnamespace = ns.oid -JOIN pg_attribute a ON a.attrelid = cls.oid -LEFT JOIN pg_type t ON t.oid = a.atttypid -LEFT JOIN pg_type et ON t.typelem = et.oid -WHERE ns.nspname = $1 - AND cls.relname = $2 - AND cls.relkind IN ('r','v','m') - AND a.attnum > 0 - AND NOT a.attisdropped -ORDER BY a.attnum; -"#; +const SCHEMAS_QUERY: &str = " +SELECT nspname AS schema_name +FROM pg_namespace +WHERE nspname NOT IN ('pg_catalog', 'information_schema') + AND nspname !~ '^pg_toast'; +"; -pub enum PostgresVariant { - Default, - Redshift, -} +const TABLES_QUERY: &str = " +SELECT tablename +FROM pg_tables +WHERE schemaname = $1; +"; #[derive(Debug, Snafu)] pub enum PostgresError { @@ -161,7 +134,6 @@ pub enum PostgresError { pub struct PostgresConnection { pub conn: bb8::PooledConnection<'static, PostgresConnectionManager>, unsupported_type_action: UnsupportedTypeAction, - quirks: PostgresConnectionQuirks, } impl SchemaValidator for PostgresConnection { @@ -218,15 +190,62 @@ impl<'a> PostgresConnection { conn, unsupported_type_action: UnsupportedTypeAction::default(), - quirks: PostgresConnectionQuirks::CheckVariant, } } + async fn tables(&self, schema: &str) -> Result, super::Error> { + let rows = self + .conn + .query(TABLES_QUERY, &[&schema]) + .await + .map_err(|e| super::Error::UnableToGetTables { + source: Box::new(e), + })?; + + Ok(rows.iter().map(|r| r.get::(0)).collect()) + } + + async fn schemas(&self) -> Result, super::Error> { + let rows = self.conn.query(SCHEMAS_QUERY, &[]).await.map_err(|e| { + super::Error::UnableToGetSchemas { + source: Box::new(e), + } + })?; + + Ok(rows.iter().map(|r| r.get::(0)).collect()) + } + async fn get_schema( &self, table_reference: &TableReference, ) -> Result { - let (variant, rows) = self.query_variant_and_schema(table_reference).await?; + let table_name = table_reference.table(); + let schema_name = table_reference.schema().unwrap_or("public"); + + let rows = match self + .conn + .query(SCHEMA_QUERY, &[&schema_name, &table_name]) + .await + { + Ok(rows) => rows, + Err(e) => { + if let Some(error_source) = e.source() { + if let Some(pg_error) = + error_source.downcast_ref::() + { + if pg_error.code() == &tokio_postgres::error::SqlState::UNDEFINED_TABLE { + return Err(super::Error::UndefinedTable { + source: Box::new(pg_error.clone()), + table_name: table_reference.to_string(), + }); + } + } + } + return Err(super::Error::UnableToGetSchema { + source: Box::new(e), + }); + } + }; let mut fields = Vec::new(); for row in rows { @@ -234,16 +253,7 @@ impl<'a> let pg_type = row.get::(1); let nullable_str = row.get::(2); let nullable = nullable_str == "YES"; - - let type_details = match variant { - PostgresVariant::Default => row.get::>(3), - // Redshift has no json* functions, so we make and parse the same value struct - // from a text column instead. - PostgresVariant::Redshift => row - .get::>(3) - .and_then(|v| serde_json::from_str::(v).ok()), - }; - + let type_details = row.get::>(3); let mut context = ParseContext::new().with_unsupported_type_action(self.unsupported_type_action); @@ -335,77 +345,4 @@ impl PostgresConnection { self.unsupported_type_action = action; self } - - #[must_use] - pub fn with_quirks(mut self, quirks: PostgresConnectionQuirks) -> Self { - self.quirks = quirks; - self - } - - pub async fn get_variant(&self) -> Result { - let row = self - .conn - .query_one("SELECT version()", &[]) - .await - .map_err(|e| super::Error::UnableToGetSchema { - source: Box::new(e), - })?; - - let version: String = row - .try_get(0) - .map_err(|e| super::Error::UnableToGetSchema { - source: Box::new(e), - })?; - - let variant = if version.contains("Redshift") { - PostgresVariant::Redshift - } else { - PostgresVariant::Default - }; - - Ok(variant) - } - - async fn query_variant_and_schema( - &self, - table_reference: &TableReference, - ) -> Result<(PostgresVariant, Vec), super::Error> { - let table_name = table_reference.table(); - let schema_name = table_reference.schema().unwrap_or("public"); - - let variant = if self.quirks.contains(PostgresConnectionQuirks::CheckVariant) { - self.get_variant().await? - } else { - PostgresVariant::Default - }; - - let query = match variant { - PostgresVariant::Default => SCHEMA_QUERY, - PostgresVariant::Redshift => REDSHIFT_SCHEMA_QUERY, - }; - - let rows = self - .conn - .query(query, &[&schema_name, &table_name]) - .await - .map_err(|e| { - if let Some(error_source) = e.source() { - if let Some(pg_error) = - error_source.downcast_ref::() - { - if pg_error.code() == &tokio_postgres::error::SqlState::UNDEFINED_TABLE { - return super::Error::UndefinedTable { - source: Box::new(pg_error.clone()), - table_name: table_reference.to_string(), - }; - } - } - } - super::Error::UnableToGetSchema { - source: Box::new(e), - } - }); - - Ok((variant, rows?)) - } } diff --git a/core/src/sql/db_connection_pool/dbconnection/sqliteconn.rs b/core/src/sql/db_connection_pool/dbconnection/sqliteconn.rs index 90bf67b9..2182c3b3 100644 --- a/core/src/sql/db_connection_pool/dbconnection/sqliteconn.rs +++ b/core/src/sql/db_connection_pool/dbconnection/sqliteconn.rs @@ -89,6 +89,28 @@ impl AsyncDbConnection for SqliteConnec SqliteConnection { conn } } + async fn tables(&self, _schema: &str) -> Result, super::Error> { + let tables = self + .conn + .call(move |conn| { + let mut stmt = conn.prepare( + "SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'", + )?; + let rows = stmt.query_map([], |row| row.get::<_, String>(0))?; + let tables: Result, rusqlite::Error> = rows.collect(); + Ok(tables?) + }) + .await + .boxed() + .context(super::UnableToGetTablesSnafu)?; + + Ok(tables) + } + + async fn schemas(&self) -> Result, super::Error> { + Ok(vec!["main".to_string()]) + } + async fn get_schema( &self, table_reference: &TableReference, diff --git a/core/src/sql/db_connection_pool/duckdbpool.rs b/core/src/sql/db_connection_pool/duckdbpool.rs index 1b23f34a..72387967 100644 --- a/core/src/sql/db_connection_pool/duckdbpool.rs +++ b/core/src/sql/db_connection_pool/duckdbpool.rs @@ -400,11 +400,11 @@ mod test { use crate::sql::db_connection_pool::DbConnectionPool; fn random_db_name() -> String { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let mut name = String::new(); for _ in 0..10 { - name.push(rng.gen_range(b'a'..=b'z') as char); + name.push(rng.random_range(b'a'..=b'z') as char); } format!("./{name}.duckdb") diff --git a/core/src/sql/db_connection_pool/mod.rs b/core/src/sql/db_connection_pool/mod.rs index ad534f82..9fa98bfc 100644 --- a/core/src/sql/db_connection_pool/mod.rs +++ b/core/src/sql/db_connection_pool/mod.rs @@ -3,12 +3,18 @@ use dbconnection::DbConnection; use std::sync::Arc; pub mod dbconnection; + +#[cfg(feature = "clickhouse")] +pub mod clickhousepool; #[cfg(feature = "duckdb")] pub mod duckdbpool; #[cfg(feature = "mysql")] pub mod mysqlpool; +#[cfg(feature = "odbc")] +pub mod odbcpool; #[cfg(feature = "postgres")] pub mod postgrespool; +pub mod runtime; #[cfg(feature = "sqlite")] pub mod sqlitepool; diff --git a/core/src/sql/db_connection_pool/mysqlpool.rs b/core/src/sql/db_connection_pool/mysqlpool.rs index e7d70b53..e81f45cb 100644 --- a/core/src/sql/db_connection_pool/mysqlpool.rs +++ b/core/src/sql/db_connection_pool/mysqlpool.rs @@ -57,6 +57,13 @@ pub struct MySQLConnectionPool { join_push_down: JoinPushDown, } +const SETUP_QUERIES: [&str; 4] = [ + "SET time_zone = '+00:00'", + "SET character_set_results = 'utf8mb4'", + "SET character_set_client = 'utf8mb4'", + "SET character_set_connection = 'utf8mb4'", +]; + /// Returns the setup queries for the MySQL connection, optionally overriding default time zone (UTC). fn get_setup_queries(time_zone: Option<&str>) -> Vec { let tz = time_zone.unwrap_or("+00:00"); diff --git a/core/src/sql/db_connection_pool/odbcpool.rs b/core/src/sql/db_connection_pool/odbcpool.rs new file mode 100644 index 00000000..0db4968b --- /dev/null +++ b/core/src/sql/db_connection_pool/odbcpool.rs @@ -0,0 +1,128 @@ +/* +Copyright 2024 The Spice.ai OSS Authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +use crate::sql::db_connection_pool::dbconnection::odbcconn::ODBCConnection; +use crate::sql::db_connection_pool::dbconnection::odbcconn::{ODBCDbConnection, ODBCParameter}; +use crate::sql::db_connection_pool::{DbConnectionPool, JoinPushDown}; +use arrow_odbc::odbc_api::{ + sys::AttrConnectionPooling, Connection, ConnectionOptions, Environment, +}; +use async_trait::async_trait; +use secrecy::{ExposeSecret, SecretBox, SecretString}; +use sha2::{Digest, Sha256}; +use snafu::prelude::*; +use std::{ + collections::HashMap, + sync::{Arc, LazyLock}, +}; + +static ENV: LazyLock = LazyLock::new(|| unsafe { + // Enable connection pooling. Let driver decide wether the attributes of two connection + // are similar enough to change the attributes of a pooled one, to fit the requested + // connection, or if it is cheaper to create a new Connection from scratch. + // See + if let Err(e) = Environment::set_connection_pooling(AttrConnectionPooling::DriverAware) { + tracing::error!("Failed to set ODBC connection pooling: {e}"); + }; + match Environment::new() { + Ok(env) => env, + Err(e) => { + panic!("Failed to create ODBC environment: {e}"); + } + } +}); + +#[derive(Debug, Snafu)] +pub enum Error { + #[snafu(display("Missing ODBC connection string parameter: odbc_connection_string"))] + MissingConnectionString {}, + + #[snafu(display("Invalid parameter: {parameter_name}"))] + InvalidParameterError { parameter_name: String }, +} + +pub struct ODBCPool { + pool: &'static Environment, + params: Arc>, + connection_string: String, + connection_id: String, +} + +fn hash_string(val: &str) -> String { + let mut hasher = Sha256::new(); + hasher.update(val); + hasher.finalize().iter().fold(String::new(), |mut hash, b| { + hash.push_str(&format!("{b:02x}")); + hash + }) +} + +impl ODBCPool { + // Creates a new instance of `ODBCPool`. + /// + /// # Errors + /// + /// Returns an error if there is a problem creating the connection pool. + pub fn new(params: HashMap) -> Result { + let connection_string = params + .get("connection_string") + .map(SecretBox::expose_secret) + .map(ToString::to_string) + .context(MissingConnectionStringSnafu)?; + + // hash the connection string to get a comparable connection ID + // we do this to prevent exposing secrets in the EXPLAIN ... plan when using federated JoinPushDown + let connection_id = hash_string(&connection_string); + + Ok(Self { + params: params.into(), + connection_string, + connection_id, + pool: &ENV, + }) + } + + #[must_use] + pub fn odbc_environment(&self) -> &'static Environment { + self.pool + } +} + +#[async_trait] +impl<'a> DbConnectionPool, ODBCParameter> for ODBCPool +where + 'a: 'static, +{ + async fn connect( + &self, + ) -> Result>, Box> { + let cxn = self.pool.connect_with_connection_string( + &self.connection_string, + ConnectionOptions::default(), + )?; + + let odbc_cxn = ODBCConnection { + conn: Arc::new(cxn.into()), + params: Arc::clone(&self.params), + }; + + Ok(Box::new(odbc_cxn)) + } + + fn join_push_down(&self) -> JoinPushDown { + JoinPushDown::AllowedFor(self.connection_id.clone()) + } +} diff --git a/core/src/sql/db_connection_pool/postgrespool.rs b/core/src/sql/db_connection_pool/postgrespool.rs index 262bc333..570caeb2 100644 --- a/core/src/sql/db_connection_pool/postgrespool.rs +++ b/core/src/sql/db_connection_pool/postgrespool.rs @@ -16,7 +16,7 @@ use secrecy::{ExposeSecret, SecretBox, SecretString}; use snafu::{prelude::*, ResultExt}; use tokio_postgres; -use super::DbConnectionPool; +use super::{runtime::run_async_with_tokio, DbConnectionPool}; use crate::sql::db_connection_pool::{ dbconnection::{postgresconn::PostgresConnection, AsyncDbConnection, DbConnection}, JoinPushDown, @@ -79,6 +79,7 @@ pub enum Error { pub type Result = std::result::Result; +#[derive(Debug)] pub struct PostgresConnectionPool { pool: Arc>>, join_push_down: JoinPushDown, @@ -408,7 +409,8 @@ impl >, > { let pool = Arc::clone(&self.pool); - let conn = pool.get_owned().await.context(ConnectionPoolRunSnafu)?; + let get_conn = async || pool.get_owned().await.context(ConnectionPoolRunSnafu); + let conn = run_async_with_tokio(get_conn).await?; Ok(Box::new( PostgresConnection::new(conn) .with_unsupported_type_action(self.unsupported_type_action), diff --git a/core/src/sql/db_connection_pool/runtime.rs b/core/src/sql/db_connection_pool/runtime.rs new file mode 100644 index 00000000..a249e4bf --- /dev/null +++ b/core/src/sql/db_connection_pool/runtime.rs @@ -0,0 +1,40 @@ +// If calling directly from Rust, there is already tokio runtime so no +// additional work is needed. If calling from Python FFI, there's no existing +// tokio runtime, so we need to start a new one. +use std::{future::Future, sync::OnceLock}; + +use tokio::runtime::Handle; + +pub(crate) struct TokioRuntime(tokio::runtime::Runtime); + +#[inline] +pub(crate) fn get_tokio_runtime() -> &'static TokioRuntime { + static RUNTIME: OnceLock = OnceLock::new(); + RUNTIME.get_or_init(|| TokioRuntime(tokio::runtime::Runtime::new().unwrap())) +} + +pub fn execute_in_tokio(f: F) -> T +where + F: FnOnce() -> Fut, + Fut: Future, +{ + get_tokio_runtime().0.block_on(f()) +} + +pub async fn run_async_with_tokio(f: F) -> Result +where + F: FnOnce() -> Fut, + Fut: Future>, +{ + match Handle::try_current() { + Ok(_) => f().await, + Err(_) => execute_in_tokio(f), + } +} + +pub fn run_sync_with_tokio(f: impl FnOnce() -> Result) -> Result { + match Handle::try_current() { + Ok(_) => f(), + Err(_) => execute_in_tokio(|| async { f() }), + } +} diff --git a/core/src/sql/db_connection_pool/sqlitepool.rs b/core/src/sql/db_connection_pool/sqlitepool.rs index 965dbe55..4d313255 100644 --- a/core/src/sql/db_connection_pool/sqlitepool.rs +++ b/core/src/sql/db_connection_pool/sqlitepool.rs @@ -269,11 +269,11 @@ mod tests { use std::time::Duration; fn random_db_name() -> String { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let mut name = String::new(); for _ in 0..10 { - name.push(rng.gen_range(b'a'..=b'z') as char); + name.push(rng.random_range(b'a'..=b'z') as char); } format!("./{name}.sqlite") @@ -291,7 +291,7 @@ mod tests { assert!(pool.mode == Mode::File); assert_eq!(pool.path, db_name.clone().into()); - drop(pool); + pool.conn.close().await.unwrap(); // cleanup std::fs::remove_file(&db_name).unwrap(); @@ -324,7 +324,7 @@ mod tests { assert!(pool.mode == Mode::File); assert_eq!(pool.path, db_names[0].clone().into()); - drop(pool); + pool.conn.close().await.unwrap(); // cleanup for db in &db_names { @@ -345,7 +345,7 @@ mod tests { assert!(pool.mode == Mode::File); assert_eq!(pool.path, db_name.clone().into()); - drop(pool); + pool.conn.close().await.unwrap(); // cleanup std::fs::remove_file(&db_name).unwrap(); @@ -365,7 +365,7 @@ mod tests { assert!(pool.mode == Mode::Memory); assert_eq!(pool.path, "./test.sqlite".into()); - drop(pool); + pool.conn.close().await.unwrap(); // in memory mode, attachments are not created and nothing happens assert!(std::fs::metadata("./test.sqlite").is_err()); diff --git a/core/src/sql/sql_provider_datafusion/expr.rs b/core/src/sql/sql_provider_datafusion/expr.rs index 65bde8f5..e7cdc918 100644 --- a/core/src/sql/sql_provider_datafusion/expr.rs +++ b/core/src/sql/sql_provider_datafusion/expr.rs @@ -489,3 +489,17 @@ mod tests { Ok(()) } } + +use datafusion::common::tree_node::{TreeNode, TreeNodeRecursion}; + +pub(super) fn expr_contains_subquery(expr: &Expr) -> datafusion::error::Result { + let mut contains_subquery = false; + expr.apply(|expr| match expr { + Expr::ScalarSubquery(_) | Expr::InSubquery(_) | Expr::Exists(_) => { + contains_subquery = true; + Ok(TreeNodeRecursion::Stop) + } + _ => Ok(TreeNodeRecursion::Continue), + })?; + Ok(contains_subquery) +} diff --git a/core/src/sql/sql_provider_datafusion/federation.rs b/core/src/sql/sql_provider_datafusion/federation.rs index 69b64ad9..73c6b449 100644 --- a/core/src/sql/sql_provider_datafusion/federation.rs +++ b/core/src/sql/sql_provider_datafusion/federation.rs @@ -22,16 +22,27 @@ use datafusion::{ }; impl SqlTable { + // Return the current memory location of the object as a unique identifier + fn unique_id(&self) -> usize { + std::ptr::from_ref(self) as usize + } + + fn arc_dialect(&self) -> Arc { + match &self.dialect { + Some(dialect) => Arc::clone(dialect), + None => Arc::new(DefaultDialect {}), + } + } + fn create_federated_table_source( self: Arc, ) -> DataFusionResult> { - let table_name = self.table_reference.clone(); + let table_reference = self.table_reference.clone(); let schema = Arc::clone(&self.schema); let fed_provider = Arc::new(SQLFederationProvider::new(self)); - Ok(Arc::new(SQLTableSource::new_with_schema( fed_provider, - RemoteTableRef::from(table_name), + RemoteTableRef::from(table_reference), schema, ))) } @@ -50,7 +61,7 @@ impl SqlTable { #[async_trait] impl SQLExecutor for SqlTable { fn name(&self) -> &str { - self.name + &self.name } fn compute_context(&self) -> Option { @@ -63,10 +74,7 @@ impl SQLExecutor for SqlTable { } fn dialect(&self) -> Arc { - let Some(ref dialect) = self.dialect else { - return Arc::new(DefaultDialect {}); - }; - Arc::clone(dialect) as Arc<_> + self.arc_dialect() } fn execute( diff --git a/core/src/sql/sql_provider_datafusion/mod.rs b/core/src/sql/sql_provider_datafusion/mod.rs index 2ebf0d8c..ea4d784c 100644 --- a/core/src/sql/sql_provider_datafusion/mod.rs +++ b/core/src/sql/sql_provider_datafusion/mod.rs @@ -12,35 +12,36 @@ use crate::sql::db_connection_pool::{ use async_trait::async_trait; use datafusion::{ catalog::Session, - common::Constraints, - sql::unparser::{ - dialect::{DefaultDialect, Dialect}, - Unparser, - }, + physical_plan::execution_plan::{Boundedness, EmissionType}, + sql::unparser::dialect::{DefaultDialect, Dialect}, }; -use expr::Engine; use futures::TryStreamExt; use snafu::prelude::*; -use std::fmt::Display; use std::{any::Any, fmt, sync::Arc}; +use std::{ + fmt::{Display, Formatter}, + sync::LazyLock, +}; use datafusion::{ - arrow::datatypes::SchemaRef, + arrow::datatypes::{DataType, Field, Schema, SchemaRef}, datasource::TableProvider, error::{DataFusionError, Result as DataFusionResult}, execution::TaskContext, - logical_expr::{Expr, TableProviderFilterPushDown, TableType}, + logical_expr::{ + logical_plan::builder::LogicalTableSource, Expr, LogicalPlan, LogicalPlanBuilder, + TableProviderFilterPushDown, TableType, + }, physical_expr::EquivalenceProperties, physical_plan::{ - execution_plan::{Boundedness, EmissionType}, - stream::RecordBatchStreamAdapter, - DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties, - SendableRecordBatchStream, + stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan, + Partitioning, PlanProperties, SendableRecordBatchStream, }, - sql::TableReference, + sql::{unparser::Unparser, TableReference}, }; -pub mod expr; +mod expr; +#[cfg(feature = "federation")] pub mod federation; #[derive(Debug, Snafu)] @@ -54,41 +55,35 @@ pub enum Error { }, #[snafu(display("Unable to generate SQL: {source}"))] - UnableToGenerateSQL { source: expr::Error }, - - #[snafu(display("Unable to generate SQL: {source}"))] - UnableToGenerateSQLDataFusion { source: DataFusionError }, + UnableToGenerateSQL { source: DataFusionError }, } pub type Result = std::result::Result; +#[derive(Clone)] pub struct SqlTable { - name: &'static str, + name: String, pool: Arc + Send + Sync>, schema: SchemaRef, pub table_reference: TableReference, - engine: Option, dialect: Option>, - constraints: Option, } -impl std::fmt::Debug for SqlTable { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { +impl fmt::Debug for SqlTable { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { f.debug_struct("SqlTable") .field("name", &self.name) - .field("table_reference", &self.table_reference) .field("schema", &self.schema) - .field("engine", &self.engine) + .field("table_reference", &self.table_reference) .finish() } } impl SqlTable { pub async fn new( - name: &'static str, + name: &str, pool: &Arc + Send + Sync>, table_reference: impl Into, - engine: Option, ) -> Result { let table_reference = table_reference.into(); let conn = pool @@ -100,89 +95,92 @@ impl SqlTable { .await .context(UnableToGetSchemaSnafu)?; - Ok(Self { - name, - pool: Arc::clone(pool), - schema, - table_reference, - engine, - dialect: None, - constraints: None, - }) + Ok(Self::new_with_schema(name, pool, schema, table_reference)) } pub fn new_with_schema( - name: &'static str, + name: &str, pool: &Arc + Send + Sync>, schema: impl Into, table_reference: impl Into, - engine: Option, ) -> Self { Self { - name, + name: name.to_owned(), pool: Arc::clone(pool), schema: schema.into(), table_reference: table_reference.into(), - engine, dialect: None, - constraints: None, } } - pub fn with_constraints_opt(mut self, constraints: Option) -> Self { - self.constraints = constraints; - self - } + pub fn scan_to_sql( + &self, + projection: Option<&Vec>, + filters: &[Expr], + limit: Option, + ) -> DataFusionResult { + let logical_plan = self.create_logical_plan(projection, filters, limit)?; + let sql = Unparser::new(self.dialect()) + .plan_to_sql(&logical_plan)? + .to_string(); - pub fn with_constraints(mut self, constraints: Constraints) -> Self { - self.constraints = Some(constraints); - self + Ok(sql) } - #[must_use] - pub fn with_dialect(self, dialect: Arc) -> Self { - Self { - dialect: Some(dialect), - ..self - } + fn create_logical_plan( + &self, + projection: Option<&Vec>, + filters: &[Expr], + limit: Option, + ) -> DataFusionResult { + let table_source = LogicalTableSource::new(self.schema()); + LogicalPlanBuilder::scan_with_filters( + self.table_reference.clone(), + Arc::new(table_source), + projection.cloned(), + filters.to_vec(), + )? + .limit(0, limit)? + .build() } fn create_physical_plan( &self, - projections: Option<&Vec>, - schema: &SchemaRef, - filters: &[Expr], - limit: Option, + projection: Option<&Vec>, + sql: String, ) -> DataFusionResult> { - let mut exec = SqlExec::new( - projections, - schema, - &self.table_reference, + Ok(Arc::new(SqlExec::new( + projection, + &self.schema(), Arc::clone(&self.pool), - filters, - limit, - self.engine, - )?; - if let Some(dialect) = &self.dialect { - exec = exec.with_dialect(Arc::clone(dialect)); - } - Ok(Arc::new(exec)) + sql, + )?)) } - // Return the current memory location of the object as a unique identifier - fn unique_id(&self) -> usize { - std::ptr::from_ref(self) as usize + #[must_use] + pub fn with_dialect(self, dialect: Arc) -> Self { + Self { + dialect: Some(dialect), + ..self + } } #[must_use] - pub fn name(&self) -> &'static str { - self.name + pub fn name(&self) -> &str { + &self.name } #[must_use] pub fn clone_pool(&self) -> Arc + Send + Sync> { Arc::clone(&self.pool) } + + fn dialect(&self) -> &(dyn Dialect + Send + Sync) { + match &self.dialect { + Some(dialect) => dialect.as_ref(), + None => &DefaultDialect {}, + } + } } #[async_trait] @@ -195,10 +193,6 @@ impl TableProvider for SqlTable { Arc::clone(&self.schema) } - fn constraints(&self) -> Option<&Constraints> { - self.constraints.as_ref() - } - fn table_type(&self) -> TableType { TableType::Base } @@ -207,14 +201,7 @@ impl TableProvider for SqlTable { &self, filters: &[&Expr], ) -> DataFusionResult> { - let filter_push_down: Vec = filters - .iter() - .map(|f| match expr::to_sql_with_engine(f, self.engine) { - Ok(_) => TableProviderFilterPushDown::Exact, - Err(_) => TableProviderFilterPushDown::Unsupported, - }) - .collect(); - + let filter_push_down = default_filter_pushdown(filters, self.dialect()); Ok(filter_push_down) } @@ -225,27 +212,37 @@ impl TableProvider for SqlTable { filters: &[Expr], limit: Option, ) -> DataFusionResult> { - return self.create_physical_plan(projection, &self.schema(), filters, limit); + let sql = self.scan_to_sql(projection, filters, limit)?; + return self.create_physical_plan(projection, sql); } } +pub fn default_filter_pushdown( + filters: &[&Expr], + dialect: &dyn Dialect, +) -> Vec { + filters + .iter() + .map(|f| match Unparser::new(dialect).expr_to_sql(f) { + // The DataFusion unparser currently does not correctly handle unparsing subquery expressions on TableScan filters. + Ok(_) => match expr::expr_contains_subquery(f) { + Ok(true) => TableProviderFilterPushDown::Unsupported, + Ok(false) => TableProviderFilterPushDown::Exact, + Err(_) => TableProviderFilterPushDown::Unsupported, + }, + Err(_) => TableProviderFilterPushDown::Unsupported, + }) + .collect() +} + impl Display for SqlTable { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "SqlTable {}", self.name) } } -#[derive(Clone)] -pub struct SqlExec { - projected_schema: SchemaRef, - table_reference: TableReference, - pool: Arc + Send + Sync>, - filters: Vec, - limit: Option, - properties: PlanProperties, - engine: Option, - dialect: Option>, -} +static ONE_COLUMN_SCHEMA: LazyLock = + LazyLock::new(|| Arc::new(Schema::new(vec![Field::new("1", DataType::Int64, true)]))); pub fn project_schema_safe( schema: &SchemaRef, @@ -254,7 +251,10 @@ pub fn project_schema_safe( let schema = match projection { Some(columns) => { if columns.is_empty() { - Arc::clone(schema) + // If the projection is Some([]) then it gets unparsed as `SELECT 1`, so return a schema with a single Int64 column. + // + // See: + Arc::clone(&ONE_COLUMN_SCHEMA) } else { Arc::new(schema.project(columns)?) } @@ -264,98 +264,43 @@ pub fn project_schema_safe( Ok(schema) } +#[derive(Clone)] +pub struct SqlExec { + projected_schema: SchemaRef, + pool: Arc + Send + Sync>, + sql: String, + properties: PlanProperties, +} + impl SqlExec { pub fn new( - projections: Option<&Vec>, + projection: Option<&Vec>, schema: &SchemaRef, - table_reference: &TableReference, pool: Arc + Send + Sync>, - filters: &[Expr], - limit: Option, - engine: Option, + sql: String, ) -> DataFusionResult { - let projected_schema = project_schema_safe(schema, projections)?; + let projected_schema = project_schema_safe(schema, projection)?; Ok(Self { projected_schema: Arc::clone(&projected_schema), - table_reference: table_reference.clone(), pool, - filters: filters.to_vec(), - limit, + sql, properties: PlanProperties::new( EquivalenceProperties::new(projected_schema), Partitioning::UnknownPartitioning(1), EmissionType::Incremental, Boundedness::Bounded, ), - engine, - dialect: None, }) } - #[must_use] - pub fn with_dialect(self, dialect: Arc) -> Self { - Self { - dialect: Some(dialect), - ..self - } - } - #[must_use] pub fn clone_pool(&self) -> Arc + Send + Sync> { Arc::clone(&self.pool) } pub fn sql(&self) -> Result { - let columns = self - .projected_schema - .fields() - .iter() - .map(|f| { - // To ensure backwards compatibility, dialect only used when explicitly set - // (i.e. Don't use `DefaultDialect`, don't derive from `self.engine`). - let quote = if let Some(dialect) = &self.dialect { - dialect - .identifier_quote_style(f.name()) - .unwrap_or_default() - .to_string() - } else if matches!(self.engine, Some(Engine::ODBC)) { - String::new() - } else { - '"'.to_string() - }; - format!("{quote}{}{quote}", f.name()) - }) - .collect::>() - .join(", "); - - let limit_expr = match self.limit { - Some(limit) => format!("LIMIT {limit}"), - None => String::new(), - }; - - let where_expr = if self.filters.is_empty() { - String::new() - } else { - let dialect = self.dialect.clone().unwrap_or(self.engine.map_or( - Arc::new(DefaultDialect {}) as Arc, - |e| e.dialect(), - )); - let unparser = Unparser::new(dialect.as_ref()); - - let filter_expr = self - .filters - .iter() - .map(|f| unparser.expr_to_sql(f).map(|s| s.to_string())) - .collect::, DataFusionError>>() - .context(UnableToGenerateSQLDataFusionSnafu)?; - format!("WHERE {}", filter_expr.join(" AND ")) - }; - - Ok(format!( - "SELECT {columns} FROM {table_reference} {where_expr} {limit_expr}", - table_reference = self.table_reference.to_quoted_string() - )) + Ok(self.sql.clone()) } } @@ -456,12 +401,111 @@ mod tests { tracing::dispatcher::set_default(&dispatch) } + mod sql_table_plan_to_sql_tests { + use std::any::Any; + + use async_trait::async_trait; + use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit}; + use datafusion::sql::unparser::dialect::{Dialect, SqliteDialect}; + use datafusion::{ + logical_expr::{col, lit}, + sql::TableReference, + }; + + use crate::sql::db_connection_pool::{ + dbconnection::DbConnection, DbConnectionPool, JoinPushDown, + }; + + use super::*; + + struct MockConn {} + + impl DbConnection<(), &'static dyn ToString> for MockConn { + fn as_any(&self) -> &dyn Any { + self + } + + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } + } + + struct MockDBPool {} + + #[async_trait] + impl DbConnectionPool<(), &'static dyn ToString> for MockDBPool { + async fn connect( + &self, + ) -> Result< + Box>, + Box, + > { + Ok(Box::new(MockConn {})) + } + + fn join_push_down(&self) -> JoinPushDown { + JoinPushDown::Disallow + } + } + + fn new_sql_table( + table_reference: &'static str, + dialect: Option>, + ) -> Result, Box> { + let fields = vec![ + Field::new("name", DataType::Utf8, false), + Field::new("age", DataType::Int16, false), + Field::new( + "createdDate", + DataType::Timestamp(TimeUnit::Millisecond, None), + false, + ), + Field::new("userId", DataType::LargeUtf8, false), + Field::new("active", DataType::Boolean, false), + Field::new("5e48", DataType::LargeUtf8, false), + ]; + let schema = Arc::new(Schema::new(fields)); + let pool = Arc::new(MockDBPool {}) + as Arc + Send + Sync>; + let table_ref = TableReference::parse_str(table_reference); + + let sql_table = SqlTable::new_with_schema(table_reference, &pool, schema, table_ref); + if let Some(dialect) = dialect { + Ok(sql_table.with_dialect(dialect)) + } else { + Ok(sql_table) + } + } + + #[tokio::test] + async fn test_sql_to_string() -> Result<(), Box> { + let sql_table = new_sql_table("users", Some(Arc::new(SqliteDialect {})))?; + let result = sql_table.scan_to_sql(Some(&vec![0]), &[], None)?; + assert_eq!(result, "SELECT `users`.`name` FROM `users`"); + Ok(()) + } + + #[tokio::test] + async fn test_sql_to_string_with_filters_and_limit( + ) -> Result<(), Box> { + let filters = vec![col("age").gt_eq(lit(30)).and(col("name").eq(lit("x")))]; + let sql_table = new_sql_table("users", Some(Arc::new(SqliteDialect {})))?; + let result = sql_table.scan_to_sql(Some(&vec![0, 1]), &filters, Some(3))?; + assert_eq!( + result, + "SELECT `users`.`name`, `users`.`age` FROM `users` WHERE ((`users`.`age` >= 30) AND (`users`.`name` = 'x')) LIMIT 3" + ); + Ok(()) + } + } + #[test] fn test_references() { let table_ref = TableReference::bare("test"); assert_eq!(format!("{table_ref}"), "test"); } + // XXX move this to duckdb mod?? #[cfg(feature = "duckdb")] mod duckdb_tests { use super::*; @@ -497,7 +541,7 @@ mod tests { db_conn.conn.execute_batch( "CREATE TABLE test (a INTEGER, b VARCHAR); INSERT INTO test VALUES (3, 'bar');", )?; - let duckdb_table = SqlTable::new("duckdb", &pool, "test", None).await?; + let duckdb_table = SqlTable::new("duckdb", &pool, "test").await?; ctx.register_table("test_datafusion", Arc::new(duckdb_table))?; let sql = "SELECT * FROM test_datafusion limit 1"; let df = ctx.sql(sql).await?; @@ -532,7 +576,7 @@ mod tests { db_conn.conn.execute_batch( "CREATE TABLE test (a INTEGER, b VARCHAR); INSERT INTO test VALUES (3, 'bar');", )?; - let duckdb_table = SqlTable::new("duckdb", &pool, "test", None).await?; + let duckdb_table = SqlTable::new("duckdb", &pool, "test").await?; ctx.register_table("test_datafusion", Arc::new(duckdb_table))?; let sql = "SELECT * FROM test_datafusion where a > 1 and b = 'bar' limit 1"; let df = ctx.sql(sql).await?; diff --git a/core/src/sqlite.rs b/core/src/sqlite.rs index b1025fd7..9f63581e 100644 --- a/core/src/sqlite.rs +++ b/core/src/sqlite.rs @@ -49,9 +49,6 @@ pub mod federation; #[cfg(feature = "sqlite-federation")] pub mod sqlite_interval; -#[cfg(feature = "sqlite-federation")] -pub mod between; - pub mod sql_table; pub mod write; @@ -122,7 +119,6 @@ type Result = std::result::Result; #[derive(Debug)] pub struct SqliteTableProviderFactory { instances: Arc>>, - decimal_between: bool, batch_insert_use_prepared_statements: bool, } @@ -136,17 +132,10 @@ impl SqliteTableProviderFactory { pub fn new() -> Self { Self { instances: Arc::new(Mutex::new(HashMap::new())), - decimal_between: false, - batch_insert_use_prepared_statements: true, // Default to true for better performance + batch_insert_use_prepared_statements: false, } } - #[must_use] - pub fn with_decimal_between(mut self, decimal_between: bool) -> Self { - self.decimal_between = decimal_between; - self - } - /// Set whether to use prepared statements for batch inserts. /// /// When enabled (default), uses prepared statements with parameter binding for optimal performance. @@ -381,15 +370,11 @@ impl TableProviderFactory for SqliteTableProviderFactory { let dyn_pool: Arc = read_pool; - let read_provider = Arc::new( - SQLiteTable::new_with_schema( - &dyn_pool, - Arc::clone(&schema), - name, - Some(cmd.constraints.clone()), - ) - .with_decimal_between(self.decimal_between), - ); + let read_provider = Arc::new(SQLiteTable::new_with_schema( + &dyn_pool, + Arc::clone(&schema), + name, + )); let sqlite = Arc::into_inner(sqlite) .context(DanglingReferenceToSqliteSnafu) @@ -409,7 +394,6 @@ impl TableProviderFactory for SqliteTableProviderFactory { pub struct SqliteTableFactory { pool: Arc, - decimal_between: bool, batch_insert_use_prepared_statements: bool, } @@ -418,17 +402,10 @@ impl SqliteTableFactory { pub fn new(pool: Arc) -> Self { Self { pool, - decimal_between: false, batch_insert_use_prepared_statements: false, } } - #[must_use] - pub fn with_decimal_between(mut self, decimal_between: bool) -> Self { - self.decimal_between = decimal_between; - self - } - /// Set whether to use prepared statements for batch inserts. /// /// When enabled (default), uses prepared statements with parameter binding for optimal performance. @@ -454,10 +431,11 @@ impl SqliteTableFactory { let dyn_pool: Arc = pool; - let read_provider = Arc::new( - SQLiteTable::new_with_schema(&dyn_pool, Arc::clone(&schema), table_reference, None) - .with_decimal_between(self.decimal_between), - ); + let read_provider = Arc::new(SQLiteTable::new_with_schema( + &dyn_pool, + Arc::clone(&schema), + table_reference, + )); Ok(read_provider) } @@ -1005,6 +983,18 @@ impl Sqlite { params.push(Box::new(array.value(row_idx).to_vec())); } } + DataType::Float16 => { + let array = column.as_any().downcast_ref::().unwrap(); + if array.is_null(row_idx) { + params.push(Box::new(rusqlite::types::Null)); + } else { + // Convert to f32 for storage + params.push(Box::new(array.value(row_idx).to_f32())); + } + } + DataType::Null => { + params.push(Box::new(rusqlite::types::Null)); + } DataType::Decimal128(_, scale) => { let array = column.as_any().downcast_ref::().unwrap(); if array.is_null(row_idx) { @@ -1022,24 +1012,13 @@ impl Sqlite { params.push(Box::new(rusqlite::types::Null)); } else { use bigdecimal::{num_bigint::BigInt, BigDecimal}; - let bigint = - BigInt::from_signed_bytes_le(&array.value(row_idx).to_le_bytes()); - let value = BigDecimal::new(bigint, i64::from(*scale)); - params.push(Box::new(value.to_string())); - } - } - DataType::Float16 => { - let array = column.as_any().downcast_ref::().unwrap(); - if array.is_null(row_idx) { - params.push(Box::new(rusqlite::types::Null)); - } else { - // Convert to f32 for storage - params.push(Box::new(array.value(row_idx).to_f32())); + let value = array.value(row_idx); + let bytes = value.to_be_bytes(); + let big_int = BigInt::from_signed_bytes_be(&bytes); + let decimal = BigDecimal::new(big_int, i64::from(*scale)); + params.push(Box::new(decimal.to_string())); } } - DataType::Null => { - params.push(Box::new(rusqlite::types::Null)); - } DataType::List(_) | DataType::LargeList(_) | DataType::ListView(_) diff --git a/core/src/sqlite/federation.rs b/core/src/sqlite/federation.rs index 02af8be5..ac77e3a5 100644 --- a/core/src/sqlite/federation.rs +++ b/core/src/sqlite/federation.rs @@ -1,10 +1,9 @@ use crate::sql::db_connection_pool::dbconnection::{get_schema, Error as DbError}; use crate::sql::sql_provider_datafusion::{get_stream, to_execution_error}; -use arrow::datatypes::SchemaRef; use async_trait::async_trait; +use datafusion::arrow::datatypes::SchemaRef; use datafusion::sql::sqlparser::ast::{self, VisitMut}; use datafusion::sql::unparser::dialect::Dialect; -use datafusion_federation::sql::ast_analyzer::AstAnalyzerRule; use datafusion_federation::sql::{ ast_analyzer::AstAnalyzer, RemoteTableRef, SQLExecutor, SQLFederationProvider, SQLTableSource, }; @@ -13,7 +12,6 @@ use futures::TryStreamExt; use snafu::ResultExt; use std::sync::Arc; -use super::between::SQLiteBetweenVisitor; use super::sql_table::SQLiteTable; use super::sqlite_interval::SQLiteIntervalVisitor; use datafusion::{ @@ -28,12 +26,12 @@ impl SQLiteTable { fn create_federated_table_source( self: Arc, ) -> DataFusionResult> { - let table_name = self.base_table.table_reference.clone(); + let table_reference = self.base_table.table_reference.clone(); let schema = Arc::clone(&Arc::clone(&self).base_table.schema()); let fed_provider = Arc::new(SQLFederationProvider::new(self)); Ok(Arc::new(SQLTableSource::new_with_schema( fed_provider, - RemoteTableRef::from(table_name), + RemoteTableRef::from(table_reference), schema, ))) } @@ -47,6 +45,7 @@ impl SQLiteTable { self, )) } +} fn sqlite_ast_analyzer(&self) -> AstAnalyzerRule { let decimal_between = self.decimal_between; @@ -65,11 +64,9 @@ impl SQLiteTable { let _ = new_query.visit(&mut between_visitor); } - Ok(ast::Statement::Query(new_query)) - } - _ => Ok(ast), - } - }) + Ok(ast::Statement::Query(new_query)) + } + _ => Ok(ast), } } @@ -88,7 +85,8 @@ impl SQLExecutor for SQLiteTable { } fn ast_analyzer(&self) -> Option { - Some(AstAnalyzer::new(vec![self.sqlite_ast_analyzer()])) + let rule = Box::new(sqlite_ast_analyzer); + Some(AstAnalyzer::new(vec![rule])) } fn execute( diff --git a/core/src/sqlite/sql_table.rs b/core/src/sqlite/sql_table.rs index 2d831058..806c9828 100644 --- a/core/src/sqlite/sql_table.rs +++ b/core/src/sqlite/sql_table.rs @@ -1,8 +1,6 @@ use crate::sql::db_connection_pool::DbConnectionPool; -use crate::sql::sql_provider_datafusion::expr::Engine; use async_trait::async_trait; use datafusion::catalog::Session; -use datafusion::common::Constraints; use datafusion::sql::unparser::dialect::SqliteDialect; use futures::TryStreamExt; use std::fmt::Display; @@ -26,14 +24,12 @@ use datafusion::{ pub struct SQLiteTable { pub(crate) base_table: SqlTable, - pub(crate) decimal_between: bool, } impl std::fmt::Debug for SQLiteTable { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("SQLiteTable") .field("base_table", &self.base_table) - .field("decimal_between", &self.decimal_between) .finish() } } @@ -43,44 +39,24 @@ impl SQLiteTable { pool: &Arc + Send + Sync>, schema: impl Into, table_reference: impl Into, - constraints: Option, ) -> Self { - let base_table = SqlTable::new_with_schema( - "sqlite", - pool, - schema, - table_reference, - Some(Engine::SQLite), - ) - .with_dialect(Arc::new(SqliteDialect {})) - .with_constraints_opt(constraints); - - Self { - base_table, - decimal_between: false, - } - } + let base_table = SqlTable::new_with_schema("sqlite", pool, schema, table_reference) + .with_dialect(Arc::new(SqliteDialect {})); - #[must_use] - pub fn with_decimal_between(mut self, decimal_between: bool) -> Self { - self.decimal_between = decimal_between; - self + Self { base_table } } fn create_physical_plan( &self, - projections: Option<&Vec>, + projection: Option<&Vec>, schema: &SchemaRef, - filters: &[Expr], - limit: Option, + sql: String, ) -> DataFusionResult> { Ok(Arc::new(SQLiteSqlExec::new( - projections, + projection, schema, - &self.base_table.table_reference, self.base_table.clone_pool(), - filters, - limit, + sql, )?)) } } @@ -113,7 +89,8 @@ impl TableProvider for SQLiteTable { filters: &[Expr], limit: Option, ) -> DataFusionResult> { - return self.create_physical_plan(projection, &self.schema(), filters, limit); + let sql = self.base_table.scan_to_sql(projection, filters, limit)?; + return self.create_physical_plan(projection, &self.schema(), sql); } } @@ -130,22 +107,12 @@ struct SQLiteSqlExec { impl SQLiteSqlExec { fn new( - projections: Option<&Vec>, + projection: Option<&Vec>, schema: &SchemaRef, - table_reference: &TableReference, pool: Arc + Send + Sync>, - filters: &[Expr], - limit: Option, + sql: String, ) -> DataFusionResult { - let base_exec = SqlExec::new( - projections, - schema, - table_reference, - pool, - filters, - limit, - Some(Engine::SQLite), - )?; + let base_exec = SqlExec::new(projection, schema, pool, sql)?; Ok(Self { base_exec }) } diff --git a/core/src/sqlite/sqlite_interval.rs b/core/src/sqlite/sqlite_interval.rs index 948504f1..98dddcf9 100644 --- a/core/src/sqlite/sqlite_interval.rs +++ b/core/src/sqlite/sqlite_interval.rs @@ -1,7 +1,7 @@ use datafusion::error::DataFusionError; use datafusion::sql::sqlparser::ast::{ self, BinaryOperator, Expr, FunctionArg, FunctionArgExpr, FunctionArgumentList, Ident, - VisitorMut, + ObjectNamePart, VisitorMut, }; use std::fmt::Display; use std::ops::ControlFlow; @@ -147,7 +147,7 @@ impl SQLiteIntervalVisitor { if let Expr::Interval(interval_expr) = interval { if let Expr::Value(ast::ValueWithSpan { value: ast::Value::SingleQuotedString(value), - .. + span: _, }) = interval_expr.value.as_ref() { return SQLiteIntervalVisitor::parse_interval_string(value); @@ -232,7 +232,7 @@ impl SQLiteIntervalVisitor { .collect(); let datetime_function = Expr::Function(ast::Function { - name: ast::ObjectName(vec![ast::ObjectNamePart::Identifier(Ident::new( + name: ast::ObjectName(vec![ObjectNamePart::Identifier(Ident::new( interval_date_type.to_string(), ))]), args: ast::FunctionArguments::List(FunctionArgumentList { @@ -260,8 +260,8 @@ impl SQLiteIntervalVisitor { if value == 0 { None } else { - Some(FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( - ast::Value::SingleQuotedString(format!("{value:+} {unit}")).into(), + Some(FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::value( + ast::Value::SingleQuotedString(format!("{value:+} {unit}")), )))) } } @@ -280,8 +280,8 @@ impl SQLiteIntervalVisitor { String::new() }; - Some(FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( - ast::Value::SingleQuotedString(format!("{value:+}{fraction_str} {unit}")).into(), + Some(FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::value( + ast::Value::SingleQuotedString(format!("{value:+}{fraction_str} {unit}")), )))) } } @@ -289,8 +289,6 @@ impl SQLiteIntervalVisitor { #[cfg(test)] mod test { - use datafusion::sql::sqlparser::ast::ObjectNamePart; - use super::*; #[test] @@ -379,7 +377,7 @@ mod test { #[test] fn test_create_date_function() { - let target = Expr::Value(ast::Value::SingleQuotedString("1995-01-01".to_string()).into()); + let target = Expr::value(ast::Value::SingleQuotedString("1995-01-01".to_string())); let interval = IntervalParts::new() .with_years(1) .with_months(2) @@ -397,17 +395,17 @@ mod test { args: ast::FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![ - FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( - ast::Value::SingleQuotedString("1995-01-01".to_string()).into(), + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::value( + ast::Value::SingleQuotedString("1995-01-01".to_string()), ))), - FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( - ast::Value::SingleQuotedString("+1 years".to_string()).into(), + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::value( + ast::Value::SingleQuotedString("+1 years".to_string()), ))), - FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( - ast::Value::SingleQuotedString("+2 months".to_string()).into(), + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::value( + ast::Value::SingleQuotedString("+2 months".to_string()), ))), - FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( - ast::Value::SingleQuotedString("+3 days".to_string()).into(), + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::value( + ast::Value::SingleQuotedString("+3 days".to_string()), ))), ], clauses: Vec::new(), @@ -429,7 +427,7 @@ mod test { #[test] fn test_create_datetime_function() { - let target = Expr::Value(ast::Value::SingleQuotedString("1995-01-01".to_string()).into()); + let target = Expr::value(ast::Value::SingleQuotedString("1995-01-01".to_string())); let interval = IntervalParts::new() .with_years(0) .with_months(0) @@ -443,23 +441,21 @@ mod test { let expected = Expr::Cast { expr: Box::new(Expr::Function(ast::Function { - name: ast::ObjectName(vec![ast::ObjectNamePart::Identifier(Ident::new( - "datetime", - ))]), + name: ast::ObjectName(vec![ObjectNamePart::Identifier(Ident::new("datetime"))]), args: ast::FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![ - FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( - ast::Value::SingleQuotedString("1995-01-01".to_string()).into(), + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::value( + ast::Value::SingleQuotedString("1995-01-01".to_string()), ))), - FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( - ast::Value::SingleQuotedString("+1 hours".to_string()).into(), + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::value( + ast::Value::SingleQuotedString("+1 hours".to_string()), ))), - FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( - ast::Value::SingleQuotedString("+2 minutes".to_string()).into(), + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::value( + ast::Value::SingleQuotedString("+2 minutes".to_string()), ))), - FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( - ast::Value::SingleQuotedString("+3 seconds".to_string()).into(), + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::value( + ast::Value::SingleQuotedString("+3 seconds".to_string()), ))), ], clauses: Vec::new(), diff --git a/core/src/sqlite/write.rs b/core/src/sqlite/write.rs index 4ca151ab..ddb8d0a2 100644 --- a/core/src/sqlite/write.rs +++ b/core/src/sqlite/write.rs @@ -1,14 +1,12 @@ use std::{any::Any, fmt, sync::Arc}; -use arrow::{array::RecordBatch, datatypes::SchemaRef}; use async_trait::async_trait; +use datafusion::arrow::{array::RecordBatch, datatypes::SchemaRef}; +use datafusion::datasource::sink::{DataSink, DataSinkExec}; use datafusion::{ catalog::Session, common::Constraints, - datasource::{ - sink::{DataSink, DataSinkExec}, - TableProvider, TableType, - }, + datasource::{TableProvider, TableType}, error::DataFusionError, execution::{SendableRecordBatchStream, TaskContext}, logical_expr::{dml::InsertOp, Expr}, @@ -18,7 +16,7 @@ use futures::StreamExt; use snafu::prelude::*; use crate::util::{ - constraints::{self, UpsertOptions}, + constraints, on_conflict::OnConflict, retriable_error::{check_and_mark_retriable_error, to_retriable_data_write_error}, }; @@ -68,6 +66,22 @@ impl TableProvider for SqliteTableWriter { Some(self.sqlite.constraints()) } + fn supports_filters_pushdown( + &self, + filters: &[&Expr], + ) -> datafusion::error::Result> { + // Verify schema consistency before delegating + // This is a cheap check since it's just comparing Arc pointers + if self.read_provider.schema() != self.schema() { + tracing::warn!( + "Schema mismatch detected in SqliteTableWriter for table {}", + self.sqlite.table_name() + ); + } + + self.read_provider.supports_filters_pushdown(filters) + } + async fn scan( &self, state: &dyn Session, @@ -84,13 +98,13 @@ impl TableProvider for SqliteTableWriter { &self, _state: &dyn Session, input: Arc, - overwrite: InsertOp, + op: InsertOp, ) -> datafusion::error::Result> { Ok(Arc::new(DataSinkExec::new( input, Arc::new(SqliteDataSink::new( Arc::clone(&self.sqlite), - overwrite, + op, self.on_conflict.clone(), self.schema(), self.sqlite.batch_insert_use_prepared_statements, @@ -144,12 +158,6 @@ impl DataSink for SqliteDataSink { let constraints = self.sqlite.constraints().clone(); let mut data = data; - let upsert_options = self - .on_conflict - .as_ref() - .map_or_else(UpsertOptions::default, |conflict| { - conflict.get_upsert_options() - }); let task = tokio::spawn(async move { let mut num_rows: u64 = 0; while let Some(data_batch) = data.next().await { @@ -158,20 +166,17 @@ impl DataSink for SqliteDataSink { DataFusionError::Execution(format!("Unable to convert num_rows() to u64: {e}")) })?; - let batches = constraints::validate_batch_with_constraints( - vec![data_batch], + constraints::validate_batch_with_constraints( + std::slice::from_ref(&data_batch), &constraints, - &upsert_options, ) .await .context(super::ConstraintViolationSnafu) .map_err(to_datafusion_error)?; - for batch in batches { - batch_tx.send(batch).await.map_err(|err| { - DataFusionError::Execution(format!("Error sending data batch: {err}")) - })?; - } + batch_tx.send(data_batch).await.map_err(|err| { + DataFusionError::Execution(format!("Error sending data batch: {err}")) + })?; } if notify_commit_transaction.send(()).is_err() { @@ -285,7 +290,7 @@ impl DisplayAs for SqliteDataSink { mod tests { use std::{collections::HashMap, sync::Arc}; - use arrow::{ + use datafusion::arrow::{ array::{Int64Array, RecordBatch, StringArray}, datatypes::{DataType, Schema}, }; @@ -304,8 +309,8 @@ mod tests { #[allow(clippy::unreadable_literal)] async fn test_round_trip_sqlite() { let schema = Arc::new(Schema::new(vec![ - arrow::datatypes::Field::new("time_in_string", DataType::Utf8, false), - arrow::datatypes::Field::new("time_int", DataType::Int64, false), + datafusion::arrow::datatypes::Field::new("time_in_string", DataType::Utf8, false), + datafusion::arrow::datatypes::Field::new("time_int", DataType::Int64, false), ])); let df_schema = ToDFSchema::to_dfschema_ref(Arc::clone(&schema)).expect("df schema"); let external_table = CreateExternalTable { @@ -319,7 +324,7 @@ mod tests { order_exprs: vec![], unbounded: false, options: HashMap::new(), - constraints: Constraints::new_unverified(vec![]), + constraints: Constraints::default(), column_defaults: HashMap::default(), temporary: false, }; @@ -356,7 +361,7 @@ mod tests { async fn test_all_arrow_types_to_sqlite() { use arrow::{ array::*, - datatypes::{i256, DataType, Field, TimeUnit}, + datatypes::{DataType, Field, TimeUnit}, }; let num_rows = 10; @@ -447,9 +452,6 @@ mod tests { DataType::Duration(TimeUnit::Nanosecond), true, ), - // Decimal types - Field::new("col_decimal128", DataType::Decimal128(38, 10), true), - Field::new("col_decimal256", DataType::Decimal256(76, 20), true), ])); let df_schema = ToDFSchema::to_dfschema_ref(Arc::clone(&schema)).expect("df schema"); @@ -464,7 +466,7 @@ mod tests { order_exprs: vec![], unbounded: false, options: HashMap::new(), - constraints: Constraints::new_unverified(vec![]), + constraints: Constraints::default(), column_defaults: HashMap::default(), temporary: false, }; @@ -508,8 +510,6 @@ mod tests { let mut dur_milli_values = Vec::with_capacity(num_rows); let mut dur_micro_values = Vec::with_capacity(num_rows); let mut dur_nano_values = Vec::with_capacity(num_rows); - let mut decimal128_values = Vec::with_capacity(num_rows); - let mut decimal256_values = Vec::with_capacity(num_rows); for i in 0..num_rows { // Add some null values at regular intervals @@ -663,16 +663,6 @@ mod tests { } else { Some(86400000000000 + i as i64) }); - decimal128_values.push(if is_null { - None - } else { - Some(123456789012345 + i as i128) - }); - decimal256_values.push(if is_null { - None - } else { - Some(i256::from_i128(123456789012345 + i as i128)) - }); } let data = RecordBatch::try_new( @@ -741,17 +731,6 @@ mod tests { Arc::new(DurationMillisecondArray::from(dur_milli_values)), Arc::new(DurationMicrosecondArray::from(dur_micro_values)), Arc::new(DurationNanosecondArray::from(dur_nano_values)), - // Decimal types - Arc::new( - Decimal128Array::from(decimal128_values) - .with_precision_and_scale(38, 10) - .unwrap(), - ), - Arc::new( - Decimal256Array::from(decimal256_values) - .with_precision_and_scale(76, 20) - .unwrap(), - ), ], ) .expect("data should be created"); @@ -772,4 +751,122 @@ mod tests { ) }); } + + #[tokio::test] + async fn test_filter_pushdown_support() { + use datafusion::logical_expr::{col, lit, TableProviderFilterPushDown}; + + let schema = Arc::new(Schema::new(vec![ + datafusion::arrow::datatypes::Field::new("id", DataType::Int64, false), + datafusion::arrow::datatypes::Field::new("name", DataType::Utf8, false), + ])); + let df_schema = ToDFSchema::to_dfschema_ref(Arc::clone(&schema)).expect("df schema"); + let external_table = CreateExternalTable { + schema: df_schema, + name: TableReference::bare("test_filter_table"), + location: String::new(), + file_type: String::new(), + table_partition_cols: vec![], + if_not_exists: true, + definition: None, + order_exprs: vec![], + unbounded: false, + options: HashMap::new(), + constraints: Constraints::default(), + column_defaults: HashMap::default(), + temporary: false, + }; + let ctx = SessionContext::new(); + let table = SqliteTableProviderFactory::default() + .create(&ctx.state(), &external_table) + .await + .expect("table should be created"); + + // Test that filter pushdown is supported + let filter = col("id").gt(lit(10)); + let result = table + .supports_filters_pushdown(&[&filter]) + .expect("should support filter pushdown"); + + assert_eq!( + result, + vec![TableProviderFilterPushDown::Exact], + "Filter pushdown should be exact for simple comparison" + ); + } + + #[tokio::test] + async fn test_concurrent_read_write_with_filter_pushdown() { + use datafusion::logical_expr::{col, lit, TableProviderFilterPushDown}; + + let schema = Arc::new(Schema::new(vec![ + datafusion::arrow::datatypes::Field::new("id", DataType::Int64, false), + datafusion::arrow::datatypes::Field::new("value", DataType::Int64, false), + ])); + let df_schema = ToDFSchema::to_dfschema_ref(Arc::clone(&schema)).expect("df schema"); + + let external_table = CreateExternalTable { + schema: df_schema, + name: TableReference::bare("concurrent_test"), + location: String::new(), + file_type: String::new(), + table_partition_cols: vec![], + if_not_exists: true, + definition: None, + order_exprs: vec![], + unbounded: false, + options: HashMap::new(), + constraints: Constraints::default(), + column_defaults: HashMap::default(), + temporary: false, + }; + + let ctx = SessionContext::new(); + let table = SqliteTableProviderFactory::default() + .create(&ctx.state(), &external_table) + .await + .expect("table should be created"); + + // Insert initial data + let arr1 = Int64Array::from(vec![1, 2, 3]); + let arr2 = Int64Array::from(vec![10, 20, 30]); + let data = RecordBatch::try_new(Arc::clone(&schema), vec![Arc::new(arr1), Arc::new(arr2)]) + .expect("data should be created"); + + let exec = MockExec::new(vec![Ok(data)], Arc::clone(&schema)); + let insertion = table + .insert_into(&ctx.state(), Arc::new(exec), InsertOp::Append) + .await + .expect("insertion should be successful"); + + collect(insertion, ctx.task_ctx()) + .await + .expect("insert successful"); + + // Verify filter pushdown works after insert + let filter = col("id").gt(lit(1)); + let result = table + .supports_filters_pushdown(&[&filter]) + .expect("should support filter pushdown"); + + assert_eq!( + result, + vec![TableProviderFilterPushDown::Exact], + "Filter pushdown should be exact for simple comparison" + ); + + // Verify we can actually scan with the filter + let scan = table + .scan(&ctx.state(), None, &[filter], None) + .await + .expect("scan should succeed"); + + let batches = collect(scan, ctx.task_ctx()) + .await + .expect("collect should succeed"); + + assert!(!batches.is_empty(), "Should have results"); + let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum(); + assert_eq!(total_rows, 2, "Should have 2 rows with id > 1"); + } } diff --git a/core/src/util/constraints.rs b/core/src/util/constraints.rs index 600558e9..54f1d778 100644 --- a/core/src/util/constraints.rs +++ b/core/src/util/constraints.rs @@ -1,20 +1,13 @@ -use arrow::{ - array::{Int64Array, RecordBatch}, - datatypes::{DataType, Field, Schema, SchemaRef}, -}; +use datafusion::arrow::{array::RecordBatch, datatypes::SchemaRef}; use datafusion::{ common::{Constraint, Constraints}, execution::context::SessionContext, - functions_aggregate::{count::count, min_max::max_udaf}, - logical_expr::{ - col, expr::WindowFunctionParams, lit, utils::COUNT_STAR_EXPANSION, Expr, WindowFrame, - WindowFunctionDefinition, - }, + functions_aggregate::count::count, + logical_expr::{col, lit, utils::COUNT_STAR_EXPANSION}, prelude::ident, }; use futures::future; use snafu::prelude::*; -use std::{fmt::Display, sync::Arc}; #[derive(Debug, Snafu)] pub enum Error { @@ -29,139 +22,33 @@ pub enum Error { pub type Result = std::result::Result; -const BATCH_ROW_NUMBER_COLUMN_NAME: &str = "__row_num"; -const MAX_ROW_NUMBER_COLUMN_NAME: &str = "__max_row_num"; - -/// Configuration options for upsert behavior -#[derive(Debug, Clone, Default, PartialEq)] -pub struct UpsertOptions { - /// Remove duplicates after validation to resolve primary key conflicts - pub remove_duplicates: bool, - /// Use "last write wins" behavior - when duplicates are found, keep the row with the highest row number - pub last_write_wins: bool, -} - -impl UpsertOptions { - /// Create a new instance with default settings - pub fn new() -> Self { - Self::default() - } - - pub fn with_remove_duplicates(mut self, remove_duplicates: bool) -> Self { - self.remove_duplicates = remove_duplicates; - self - } - - pub fn with_last_write_wins(mut self, last_write_wins: bool) -> Self { - self.last_write_wins = last_write_wins; - self - } - - pub fn is_default(&self) -> bool { - !self.remove_duplicates && !self.last_write_wins - } -} - -impl Display for UpsertOptions { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let mut options = Vec::new(); - if self.remove_duplicates { - options.push("remove_duplicates"); - } - if self.last_write_wins { - options.push("last_write_wins"); - } - write!(f, "{}", options.join(",")) - } -} - -impl TryFrom<&str> for UpsertOptions { - type Error = Error; - - fn try_from(value: &str) -> std::result::Result { - let options = value.split(',').map(str::trim).collect::>(); - let mut upsert_options = Self::default(); - - for option in options { - match option { - "remove_duplicates" => upsert_options.remove_duplicates = true, - "last_write_wins" => upsert_options.last_write_wins = true, - "" => {} - _ => { - return Err(Error::DataFusion { - source: datafusion::error::DataFusionError::Plan(format!( - "Unknown upsert option: {}", - option - )), - }); - } - } - } - - Ok(upsert_options) - } -} - /// The goal for this function is to determine if all of the data described in `batches` conforms to the constraints described in `constraints`. /// /// It does this by creating a memory table from the record batches and then running a query against the table to validate the constraints. -/// -/// The `options` parameter allows customizing validation behavior such as duplicate removal. -/// -/// Returns the potentially modified batches (e.g., with duplicates removed or last-write-wins applied). pub async fn validate_batch_with_constraints( - batches: Vec, + batches: &[RecordBatch], constraints: &Constraints, - options: &UpsertOptions, -) -> Result> { +) -> Result<()> { if batches.is_empty() || constraints.is_empty() { - return Ok(batches); + return Ok(()); } - // First check if any constraints are violated without attempting to fix them. let mut futures = Vec::new(); for constraint in &**constraints { - let fut = validate_batch_with_constraint_with_options( - batches.clone(), - constraint.clone(), - UpsertOptions::default(), - ); + let fut = validate_batch_with_constraint(batches.to_vec(), constraint.clone()); futures.push(fut); } - match future::try_join_all(futures).await { - Ok(_) => { - // No constraints were violated, just return. - return Ok(batches); - } - Err(e) => { - // Some constraints were violated, if we have the default validation options we need to return an error, otherwise we'll try to fix the batches - if options.is_default() { - return Err(e); - } - } - }; + future::try_join_all(futures).await?; - // Some constraints were violated, but we can potentially fix them - // These need to run sequentially since the batches are modified to fix the constraint violations. - let mut processed_batches = batches; - for constraint in &**constraints { - processed_batches = validate_batch_with_constraint_with_options( - processed_batches, - constraint.clone(), - options.clone(), - ) - .await?; - } - Ok(processed_batches) + Ok(()) } -#[tracing::instrument(level = "debug", skip(batches, options))] -async fn validate_batch_with_constraint_with_options( +#[tracing::instrument(level = "debug", skip(batches))] +async fn validate_batch_with_constraint( batches: Vec, constraint: Constraint, - options: UpsertOptions, -) -> Result> { +) -> Result<()> { let unique_cols = match constraint { Constraint::PrimaryKey(cols) | Constraint::Unique(cols) => cols, }; @@ -172,29 +59,8 @@ async fn validate_batch_with_constraint_with_options( .map(|col| schema.field(*col)) .collect::>(); - // Prepare data with row numbers if last write wins is enabled - let batches_with_row_nums = if options.last_write_wins { - add_row_numbers_to_batches(batches)? - } else { - batches - }; - let ctx = SessionContext::new(); - let mut df = ctx - .read_batches(batches_with_row_nums) - .context(DataFusionSnafu)?; - - // Apply last write wins logic - keep only the row with the highest row number for each unique key - if options.last_write_wins { - df = apply_last_write_wins(&mut df, &unique_fields) - .await - .context(DataFusionSnafu)?; - } - - // Remove duplicates first if requested to resolve primary key conflicts for duplicate rows - if options.remove_duplicates { - df = df.distinct().context(DataFusionSnafu)?; - } + let df = ctx.read_batches(batches).context(DataFusionSnafu)?; let count_name = count(lit(COUNT_STAR_EXPANSION)).schema_name().to_string(); @@ -203,7 +69,6 @@ async fn validate_batch_with_constraint_with_options( // SELECT COUNT(1), FROM mem_table GROUP BY HAVING COUNT(1) > 1 // ``` let num_rows = df - .clone() .aggregate( unique_fields.iter().map(|f| ident(f.name())).collect(), vec![count(lit(COUNT_STAR_EXPANSION))], @@ -225,90 +90,7 @@ async fn validate_batch_with_constraint_with_options( .fail()?; } - // Return the processed batches - let final_batches = df.collect().await.context(DataFusionSnafu)?; - Ok(final_batches) -} - -/// Add row numbers to batches for tracking insertion order in last write wins logic -fn add_row_numbers_to_batches(batches: Vec) -> Result> { - let mut batches_with_row_nums = Vec::new(); - let mut global_row_number = 0i64; - - for batch in batches { - let num_rows = batch.num_rows(); - let row_numbers: Vec = - (global_row_number..global_row_number + num_rows as i64).collect(); - global_row_number += num_rows as i64; - - // Create new schema with additional row_num column - let mut fields = batch.schema().fields().iter().cloned().collect::>(); - fields.push(Arc::new(Field::new( - BATCH_ROW_NUMBER_COLUMN_NAME, - DataType::Int64, - false, - ))); - let new_schema = Arc::new(Schema::new(fields)); - - // Create new columns including row_num - let mut columns = batch.columns().to_vec(); - columns.push(Arc::new(Int64Array::from(row_numbers)) as arrow::array::ArrayRef); - - let new_batch = - RecordBatch::try_new(new_schema, columns).map_err(|e| Error::DataFusion { - source: datafusion::error::DataFusionError::ArrowError(Box::new(e), None), - })?; - batches_with_row_nums.push(new_batch); - } - - Ok(batches_with_row_nums) -} - -/// Apply last write wins logic using window functions to keep only the row with highest row number for each unique key -async fn apply_last_write_wins( - df: &mut datafusion::dataframe::DataFrame, - unique_fields: &[&arrow::datatypes::Field], -) -> datafusion::error::Result { - // Create partition by expressions for the unique fields - let partition_by: Vec = unique_fields - .iter() - .map(|field| col(field.name())) - .collect(); - - // Create a MAX window function to get the maximum row number for each partition - let max_row_num_expr = - Expr::WindowFunction(Box::new(datafusion::logical_expr::expr::WindowFunction { - fun: WindowFunctionDefinition::AggregateUDF(max_udaf()), - params: WindowFunctionParams { - args: vec![col(BATCH_ROW_NUMBER_COLUMN_NAME)], - partition_by, - order_by: vec![col(BATCH_ROW_NUMBER_COLUMN_NAME).sort(false, true)], // Order by row_num DESC - window_frame: WindowFrame::new(Some(false)), - null_treatment: None, - }, - })); - - // Add the maximum row number as a column with explicit alias - let df_with_max = df - .clone() - .with_column(MAX_ROW_NUMBER_COLUMN_NAME, max_row_num_expr)?; - - // Filter to keep only rows where __row_num equals the max row number for its partition - let filtered_df = df_with_max - .filter(col(BATCH_ROW_NUMBER_COLUMN_NAME).eq(col(MAX_ROW_NUMBER_COLUMN_NAME)))?; - - // Get all original columns from the dataframe (excluding __row_num) - let df_schema = df.schema(); - let original_column_exprs: Vec = df_schema - .fields() - .iter() - .filter(|field| field.name() != BATCH_ROW_NUMBER_COLUMN_NAME) - .map(|field| col(field.name())) - .collect(); - - let final_df = filtered_df.select(original_column_exprs)?; - - Ok(final_df) + Ok(()) } #[must_use] @@ -331,13 +113,9 @@ pub fn get_primary_keys_from_constraints( #[cfg(test)] pub(crate) mod tests { - use super::*; use std::sync::Arc; - use arrow::{ - array::{ArrayRef, Int32Array, RecordBatch, StringArray}, - datatypes::{DataType, Field, Schema, SchemaRef}, - }; + use datafusion::arrow::datatypes::SchemaRef; use datafusion::{ common::{Constraint, Constraints}, parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder, @@ -352,7 +130,8 @@ pub(crate) mod tests { let parquet_reader = ParquetRecordBatchReaderBuilder::try_new(parquet_bytes)?.build()?; - let records = parquet_reader.collect::, arrow::error::ArrowError>>()?; + let records = + parquet_reader.collect::, datafusion::arrow::error::ArrowError>>()?; let schema = records[0].schema(); let constraints = get_unique_constraints( @@ -360,12 +139,7 @@ pub(crate) mod tests { Arc::clone(&schema), ); - let result = validate_batch_with_constraints( - records.clone(), - &constraints, - &UpsertOptions::default(), - ) - .await; + let result = super::validate_batch_with_constraints(&records, &constraints).await; assert!( result.is_ok(), "{}", @@ -373,12 +147,7 @@ pub(crate) mod tests { ); let invalid_constraints = get_unique_constraints(&["VendorID"], Arc::clone(&schema)); - let result = validate_batch_with_constraints( - records.clone(), - &invalid_constraints, - &UpsertOptions::default(), - ) - .await; + let result = super::validate_batch_with_constraints(&records, &invalid_constraints).await; assert!(result.is_err()); assert_eq!( result.expect_err("this returned an error").to_string(), @@ -387,12 +156,7 @@ pub(crate) mod tests { let invalid_constraints = get_unique_constraints(&["VendorID", "tpep_pickup_datetime"], Arc::clone(&schema)); - let result = validate_batch_with_constraints( - records, - &invalid_constraints, - &UpsertOptions::default(), - ) - .await; + let result = super::validate_batch_with_constraints(&records, &invalid_constraints).await; assert!(result.is_err()); assert_eq!( result.expect_err("this returned an error").to_string(), @@ -421,908 +185,4 @@ pub(crate) mod tests { Constraints::new_unverified(vec![Constraint::PrimaryKey(indices)]) } - - /// Builder for creating test RecordBatches with specific data patterns - #[derive(Debug)] - pub struct TestDataBuilder { - schema: SchemaRef, - batches: Vec, - } - - impl TestDataBuilder { - pub fn new(schema: SchemaRef) -> Self { - Self { - schema, - batches: Vec::new(), - } - } - - pub fn with_columns(columns: &[(&str, DataType)]) -> Self { - let fields: Vec = columns - .iter() - .map(|(name, data_type)| Field::new(*name, data_type.clone(), false)) - .collect(); - let schema = Arc::new(Schema::new(fields)); - Self::new(schema) - } - - pub fn add_string_batch( - self, - data: Vec>>, - ) -> Result { - let columns: Result, arrow::error::ArrowError> = - (0..self.schema.fields().len()) - .map(|col_idx| { - let column_data: Vec> = data - .iter() - .map(|row| row.get(col_idx).and_then(|v| v.map(|s| s.to_string()))) - .collect(); - Ok(Arc::new(StringArray::from(column_data)) as ArrayRef) - }) - .collect(); - - let batch = RecordBatch::try_new(self.schema.clone(), columns?)?; - let mut new_self = self; - new_self.batches.push(batch); - Ok(new_self) - } - - pub fn add_int_batch( - self, - data: Vec>>, - ) -> Result { - let columns: Result, arrow::error::ArrowError> = - (0..self.schema.fields().len()) - .map(|col_idx| { - let column_data: Vec> = data - .iter() - .map(|row| row.get(col_idx).copied().flatten()) - .collect(); - Ok(Arc::new(Int32Array::from(column_data)) as ArrayRef) - }) - .collect(); - - let batch = RecordBatch::try_new(self.schema.clone(), columns?)?; - let mut new_self = self; - new_self.batches.push(batch); - Ok(new_self) - } - - pub fn build(self) -> Vec { - self.batches - } - } - - /// Builder for creating different types of constraints - #[derive(Debug)] - pub struct ConstraintBuilder { - schema: SchemaRef, - } - - impl ConstraintBuilder { - pub fn new(schema: SchemaRef) -> Self { - Self { schema } - } - - /// Create a unique constraint on the specified columns - pub fn unique_on(&self, column_names: &[&str]) -> Constraints { - get_unique_constraints(column_names, self.schema.clone()) - } - - /// Create a primary key constraint on the specified columns - pub fn primary_key_on(&self, column_names: &[&str]) -> Constraints { - get_pk_constraints(column_names, self.schema.clone()) - } - } - - pub enum Expect { - Pass, - Fail, - } - - /// Helper for testing constraint validation scenarios - pub struct ConstraintTestCase { - pub name: String, - pub batches: Vec, - pub constraints: Constraints, - pub should_pass: Expect, - pub expected_error_contains: Option, - } - - impl ConstraintTestCase { - /// Create a new test case - pub fn new( - name: &str, - batches: Vec, - constraints: Constraints, - should_pass: Expect, - ) -> Self { - Self { - name: name.to_string(), - batches, - constraints, - should_pass, - expected_error_contains: None, - } - } - - /// Set expected error message substring - pub fn with_expected_error(mut self, error_substr: &str) -> Self { - self.expected_error_contains = Some(error_substr.to_string()); - self - } - - /// Run the test case - pub async fn run(self) -> Result<(), anyhow::Error> { - let result = validate_batch_with_constraints( - self.batches.clone(), - &self.constraints, - &UpsertOptions::default(), - ) - .await; - - match (self.should_pass, result) { - (Expect::Pass, Ok(_)) => { - println!("✓ Test '{}' passed as expected", self.name); - Ok(()) - } - (Expect::Fail, Err(err)) => { - if let Some(expected_substr) = &self.expected_error_contains { - let err_str = err.to_string(); - if err_str.contains(expected_substr) { - println!( - "✓ Test '{}' failed as expected with error: {}", - self.name, err_str - ); - Ok(()) - } else { - Err(anyhow::anyhow!( - "Test '{}' failed with unexpected error. Expected substring '{}', got: {}", - self.name, expected_substr, err_str - )) - } - } else { - println!( - "✓ Test '{}' failed as expected with error: {}", - self.name, err - ); - Ok(()) - } - } - (Expect::Pass, Err(err)) => Err(anyhow::anyhow!( - "Test '{}' was expected to pass but failed with error: {}", - self.name, - err - )), - (Expect::Fail, Ok(_)) => Err(anyhow::anyhow!( - "Test '{}' was expected to fail but passed", - self.name - )), - } - } - } - - #[tokio::test] - async fn test_valid_unique_constraint_no_duplicates() -> Result<(), anyhow::Error> { - let data_builder = TestDataBuilder::with_columns(&[ - ("id", DataType::Utf8), - ("name", DataType::Utf8), - ("category", DataType::Utf8), - ]); - - let batches = data_builder - .add_string_batch(vec![ - vec![Some("1"), Some("Alice"), Some("A")], - vec![Some("2"), Some("Bob"), Some("B")], - vec![Some("3"), Some("Charlie"), Some("A")], - ])? - .build(); - - let constraint_builder = ConstraintBuilder::new(batches[0].schema()); - let constraints = constraint_builder.unique_on(&["id"]); - - ConstraintTestCase::new( - "valid unique constraint on id", - batches, - constraints, - Expect::Pass, - ) - .run() - .await - } - - #[tokio::test] - async fn test_invalid_unique_constraint_duplicate_values() -> Result<(), anyhow::Error> { - let data_builder = - TestDataBuilder::with_columns(&[("id", DataType::Utf8), ("name", DataType::Utf8)]); - - let batches = data_builder - .add_string_batch(vec![ - vec![Some("1"), Some("Alice")], - vec![Some("1"), Some("Bob")], - vec![Some("2"), Some("Charlie")], - ])? - .build(); - - let constraint_builder = ConstraintBuilder::new(batches[0].schema()); - let constraints = constraint_builder.unique_on(&["id"]); - - ConstraintTestCase::new( - "invalid unique constraint - duplicate ids", - batches, - constraints, - Expect::Fail, - ) - .with_expected_error("violates uniqueness constraint on column(s): id") - .run() - .await - } - - #[tokio::test] - async fn test_valid_composite_unique_constraint() -> Result<(), anyhow::Error> { - let data_builder = TestDataBuilder::with_columns(&[ - ("user_id", DataType::Utf8), - ("product_id", DataType::Utf8), - ("rating", DataType::Utf8), - ]); - - let batches = data_builder - .add_string_batch(vec![ - vec![Some("1"), Some("A"), Some("5")], - vec![Some("1"), Some("B"), Some("4")], - vec![Some("2"), Some("A"), Some("3")], - vec![Some("2"), Some("B"), Some("5")], - ])? - .build(); - - let constraint_builder = ConstraintBuilder::new(batches[0].schema()); - let constraints = constraint_builder.unique_on(&["user_id", "product_id"]); - - ConstraintTestCase::new( - "valid composite unique constraint", - batches, - constraints, - Expect::Pass, - ) - .run() - .await - } - - #[tokio::test] - async fn test_invalid_composite_unique_constraint() -> Result<(), anyhow::Error> { - let data_builder = TestDataBuilder::with_columns(&[ - ("user_id", DataType::Utf8), - ("product_id", DataType::Utf8), - ]); - - let batches = data_builder - .add_string_batch(vec![ - vec![Some("1"), Some("A")], - vec![Some("1"), Some("B")], - vec![Some("1"), Some("A")], - ])? - .build(); - - let constraint_builder = ConstraintBuilder::new(batches[0].schema()); - let constraints = constraint_builder.unique_on(&["user_id", "product_id"]); - - ConstraintTestCase::new( - "invalid composite unique constraint", - batches, - constraints, - Expect::Fail, - ) - .with_expected_error("violates uniqueness constraint on column(s): user_id, product_id") - .run() - .await - } - - #[tokio::test] - async fn test_valid_primary_key_constraint() -> Result<(), anyhow::Error> { - let data_builder = - TestDataBuilder::with_columns(&[("pk", DataType::Utf8), ("data", DataType::Utf8)]); - - let batches = data_builder - .add_string_batch(vec![ - vec![Some("pk1"), Some("data1")], - vec![Some("pk2"), Some("data2")], - vec![Some("pk3"), Some("data3")], - ])? - .build(); - - let constraint_builder = ConstraintBuilder::new(batches[0].schema()); - let constraints = constraint_builder.primary_key_on(&["pk"]); - - ConstraintTestCase::new( - "valid primary key constraint", - batches, - constraints, - Expect::Pass, - ) - .run() - .await - } - - #[tokio::test] - async fn test_invalid_primary_key_constraint() -> Result<(), anyhow::Error> { - let data_builder = - TestDataBuilder::with_columns(&[("pk", DataType::Utf8), ("data", DataType::Utf8)]); - - let batches = data_builder - .add_string_batch(vec![ - vec![Some("pk1"), Some("data1")], - vec![Some("pk1"), Some("data2")], - ])? - .build(); - - let constraint_builder = ConstraintBuilder::new(batches[0].schema()); - let constraints = constraint_builder.primary_key_on(&["pk"]); - - ConstraintTestCase::new( - "invalid primary key constraint", - batches, - constraints, - Expect::Fail, - ) - .with_expected_error("violates uniqueness constraint on column(s): pk") - .run() - .await - } - - #[tokio::test] - async fn test_multiple_batches_with_valid_constraints() -> Result<(), anyhow::Error> { - let data_builder = - TestDataBuilder::with_columns(&[("id", DataType::Utf8), ("value", DataType::Utf8)]); - - let batches = data_builder - .add_string_batch(vec![vec![Some("1"), Some("A")], vec![Some("2"), Some("B")]])? - .add_string_batch(vec![vec![Some("3"), Some("C")], vec![Some("4"), Some("D")]])? - .build(); - - let constraint_builder = ConstraintBuilder::new(batches[0].schema()); - let constraints = constraint_builder.unique_on(&["id"]); - - ConstraintTestCase::new( - "multiple batches with valid constraints", - batches, - constraints, - Expect::Pass, - ) - .run() - .await - } - - #[tokio::test] - async fn test_multiple_batches_with_cross_batch_violations() -> Result<(), anyhow::Error> { - let data_builder = - TestDataBuilder::with_columns(&[("id", DataType::Utf8), ("value", DataType::Utf8)]); - - let batches = data_builder - .add_string_batch(vec![vec![Some("1"), Some("A")], vec![Some("2"), Some("B")]])? - .add_string_batch(vec![vec![Some("1"), Some("C")], vec![Some("4"), Some("D")]])? - .build(); - - let constraint_builder = ConstraintBuilder::new(batches[0].schema()); - let constraints = constraint_builder.unique_on(&["id"]); - - ConstraintTestCase::new( - "multiple batches with cross-batch violations", - batches, - constraints, - Expect::Fail, - ) - .with_expected_error("violates uniqueness constraint on column(s): id") - .run() - .await - } - - #[tokio::test] - async fn test_integer_data_with_valid_constraints() -> Result<(), anyhow::Error> { - let data_builder = TestDataBuilder::with_columns(&[ - ("int_id", DataType::Int32), - ("value", DataType::Int32), - ]); - - let batches = data_builder - .add_int_batch(vec![ - vec![Some(1), Some(100)], - vec![Some(2), Some(200)], - vec![Some(3), Some(300)], - ])? - .build(); - - let constraint_builder = ConstraintBuilder::new(batches[0].schema()); - let constraints = constraint_builder.unique_on(&["int_id"]); - - ConstraintTestCase::new( - "integer data with valid constraints", - batches, - constraints, - Expect::Pass, - ) - .run() - .await - } - - #[tokio::test] - async fn test_empty_batch_handling() -> Result<(), anyhow::Error> { - let data_builder = TestDataBuilder::with_columns(&[("id", DataType::Utf8)]); - let empty_batches = data_builder.build(); - - let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Utf8, false)])); - let constraint_builder = ConstraintBuilder::new(schema); - let constraints = constraint_builder.unique_on(&["id"]); - - ConstraintTestCase::new( - "empty batches should pass", - empty_batches, - constraints, - Expect::Pass, - ) - .run() - .await - } - - #[tokio::test] - async fn test_duplicate_removal_resolves_primary_key_conflicts() -> Result<(), anyhow::Error> { - let data_builder = - TestDataBuilder::with_columns(&[("id", DataType::Utf8), ("name", DataType::Utf8)]); - - // Create batches with duplicate rows that would violate primary key constraint - let batches = data_builder - .add_string_batch(vec![ - vec![Some("1"), Some("Alice")], - vec![Some("1"), Some("Alice")], // Exact duplicate - vec![Some("2"), Some("Bob")], - vec![Some("2"), Some("Bob")], // Another exact duplicate - ])? - .build(); - - let constraint_builder = ConstraintBuilder::new(batches[0].schema()); - let constraints = constraint_builder.primary_key_on(&["id"]); - - // Without duplicate removal, this should fail - let result = validate_batch_with_constraints( - batches.clone(), - &constraints, - &UpsertOptions::default(), - ) - .await; - assert!( - result.is_err(), - "Expected validation to fail with duplicates" - ); - - // With duplicate removal, this should pass - let options = UpsertOptions::new().with_remove_duplicates(true); - let result = validate_batch_with_constraints(batches, &constraints, &options).await; - assert!( - result.is_ok(), - "Expected validation to pass after removing duplicates: {:?}", - result - ); - - Ok(()) - } - - #[tokio::test] - async fn test_duplicate_removal_with_partial_duplicates() -> Result<(), anyhow::Error> { - let data_builder = TestDataBuilder::with_columns(&[ - ("id", DataType::Utf8), - ("name", DataType::Utf8), - ("category", DataType::Utf8), - ]); - - // Create batches where some rows are duplicates and others are unique - let batches = data_builder - .add_string_batch(vec![ - vec![Some("1"), Some("Alice"), Some("A")], - vec![Some("1"), Some("Alice"), Some("A")], // Exact duplicate - vec![Some("2"), Some("Bob"), Some("B")], // Unique - vec![Some("3"), Some("Charlie"), Some("C")], // Unique - ])? - .build(); - - let constraint_builder = ConstraintBuilder::new(batches[0].schema()); - let constraints = constraint_builder.unique_on(&["id"]); - - // With duplicate removal, this should pass - let options = UpsertOptions::new().with_remove_duplicates(true); - let result = validate_batch_with_constraints(batches, &constraints, &options).await; - assert!( - result.is_ok(), - "Expected validation to pass after removing duplicates: {:?}", - result - ); - - Ok(()) - } - - #[tokio::test] - async fn test_duplicate_removal_across_multiple_batches() -> Result<(), anyhow::Error> { - let data_builder = - TestDataBuilder::with_columns(&[("id", DataType::Utf8), ("value", DataType::Utf8)]); - - // Create multiple batches where duplicates occur across batches - let batches = data_builder - .add_string_batch(vec![vec![Some("1"), Some("A")], vec![Some("2"), Some("B")]])? - .add_string_batch(vec![ - vec![Some("1"), Some("A")], // Duplicate from first batch - vec![Some("3"), Some("C")], - ])? - .add_string_batch(vec![ - vec![Some("2"), Some("B")], // Duplicate from first batch - vec![Some("4"), Some("D")], - ])? - .build(); - - let constraint_builder = ConstraintBuilder::new(batches[0].schema()); - let constraints = constraint_builder.unique_on(&["id"]); - - // Without duplicate removal, this should fail due to cross-batch duplicates - let result = validate_batch_with_constraints( - batches.clone(), - &constraints, - &UpsertOptions::default(), - ) - .await; - assert!( - result.is_err(), - "Expected validation to fail with cross-batch duplicates" - ); - - // With duplicate removal, this should pass - let options = UpsertOptions::new().with_remove_duplicates(true); - let result = validate_batch_with_constraints(batches, &constraints, &options).await; - assert!( - result.is_ok(), - "Expected validation to pass after removing cross-batch duplicates: {:?}", - result - ); - - Ok(()) - } - - #[tokio::test] - async fn test_last_write_wins_basic_behavior() -> Result<(), anyhow::Error> { - let data_builder = - TestDataBuilder::with_columns(&[("id", DataType::Utf8), ("value", DataType::Utf8)]); - - // Create batches with duplicate keys but different values - last one should win - let batches = data_builder - .add_string_batch(vec![ - vec![Some("1"), Some("first")], - vec![Some("2"), Some("A")], - ])? - .add_string_batch(vec![ - vec![Some("1"), Some("second")], // Should override first value - vec![Some("3"), Some("B")], - ])? - .add_string_batch(vec![ - vec![Some("1"), Some("third")], // Should be the final value - vec![Some("4"), Some("C")], - ])? - .build(); - - let original_schema = batches[0].schema(); - let constraint_builder = ConstraintBuilder::new(batches[0].schema()); - let constraints = constraint_builder.unique_on(&["id"]); - - // Without last write wins, this should fail due to duplicate keys - let result = validate_batch_with_constraints( - batches.clone(), - &constraints, - &UpsertOptions::default(), - ) - .await; - assert!( - result.is_err(), - "Expected validation to fail with duplicate keys" - ); - - // With last write wins, this should pass and keep the last value - let options = UpsertOptions::new().with_last_write_wins(true); - let result_batches = - validate_batch_with_constraints(batches, &constraints, &options).await?; - - // Verify schema matches - assert_eq!( - result_batches[0].schema(), - original_schema, - "Output schema should match input schema" - ); - - // Verify expected row count (should be 4: one for each unique id) - let total_rows: usize = result_batches.iter().map(|b| b.num_rows()).sum(); - assert_eq!(total_rows, 4, "Expected 4 rows after deduplication"); - - // Use DataFusion to verify the data - let ctx = SessionContext::new(); - let df = ctx.read_batches(result_batches)?; - - // Verify that id "1" has value "third" - let result = df - .clone() - .filter(col("id").eq(lit("1")))? - .select(vec![col("value")])? - .collect() - .await?; - - assert_eq!(result.len(), 1, "Should have exactly one batch"); - assert_eq!( - result[0].num_rows(), - 1, - "Should have exactly one row for id '1'" - ); - - let value_array = result[0] - .column(0) - .as_any() - .downcast_ref::() - .expect("Failed to cast value column"); - assert_eq!( - value_array.value(0), - "third", - "Expected last value 'third' for id '1'" - ); - - Ok(()) - } - - #[tokio::test] - async fn test_last_write_wins_within_single_batch() -> Result<(), anyhow::Error> { - let data_builder = - TestDataBuilder::with_columns(&[("id", DataType::Utf8), ("name", DataType::Utf8)]); - - // Create batch with duplicates within the same batch - let batches = data_builder - .add_string_batch(vec![ - vec![Some("1"), Some("Alice")], - vec![Some("2"), Some("Bob")], - vec![Some("1"), Some("Alice_Updated")], // Should win over first Alice - vec![Some("3"), Some("Charlie")], - ])? - .build(); - - let original_schema = batches[0].schema(); - let constraint_builder = ConstraintBuilder::new(batches[0].schema()); - let constraints = constraint_builder.unique_on(&["id"]); - - // With last write wins, this should pass and keep the updated name - let options = UpsertOptions::new().with_last_write_wins(true); - let result_batches = - validate_batch_with_constraints(batches, &constraints, &options).await?; - - // Verify schema matches - assert_eq!( - result_batches[0].schema(), - original_schema, - "Output schema should match input schema" - ); - - // Verify expected row count (should be 3: one for each unique id) - let total_rows: usize = result_batches.iter().map(|b| b.num_rows()).sum(); - assert_eq!(total_rows, 3, "Expected 3 rows after deduplication"); - - // Use DataFusion to verify the data - let ctx = SessionContext::new(); - let df = ctx.read_batches(result_batches)?; - - // Verify that id "1" has the updated name - let result = df - .clone() - .filter(col("id").eq(lit("1")))? - .select(vec![col("name")])? - .collect() - .await?; - - assert_eq!(result.len(), 1, "Should have exactly one batch"); - assert_eq!( - result[0].num_rows(), - 1, - "Should have exactly one row for id '1'" - ); - - let name_array = result[0] - .column(0) - .as_any() - .downcast_ref::() - .expect("Failed to cast name column"); - assert_eq!( - name_array.value(0), - "Alice_Updated", - "Expected updated name 'Alice_Updated' for id '1'" - ); - - Ok(()) - } - - #[tokio::test] - async fn test_last_write_wins_with_composite_keys() -> Result<(), anyhow::Error> { - let data_builder = TestDataBuilder::with_columns(&[ - ("user_id", DataType::Utf8), - ("product_id", DataType::Utf8), - ("rating", DataType::Utf8), - ]); - - // Create data with composite key conflicts - let batches = data_builder - .add_string_batch(vec![ - vec![Some("1"), Some("A"), Some("3")], // First rating for user 1, product A - vec![Some("2"), Some("B"), Some("4")], - ])? - .add_string_batch(vec![ - vec![Some("1"), Some("A"), Some("5")], // Updated rating (should win) - vec![Some("3"), Some("C"), Some("2")], - ])? - .build(); - - let original_schema = batches[0].schema(); - let constraint_builder = ConstraintBuilder::new(batches[0].schema()); - let constraints = constraint_builder.unique_on(&["user_id", "product_id"]); - - // Without last write wins, this should fail - let result = validate_batch_with_constraints( - batches.clone(), - &constraints, - &UpsertOptions::default(), - ) - .await; - assert!( - result.is_err(), - "Expected validation to fail with composite key duplicates" - ); - - // With last write wins, this should pass and keep the updated rating - let options = UpsertOptions::new().with_last_write_wins(true); - let result_batches = - validate_batch_with_constraints(batches, &constraints, &options).await?; - - // Verify schema matches - assert_eq!( - result_batches[0].schema(), - original_schema, - "Output schema should match input schema" - ); - - // Verify expected row count (should be 3: one for each unique combination) - let total_rows: usize = result_batches.iter().map(|b| b.num_rows()).sum(); - assert_eq!(total_rows, 3, "Expected 3 rows after deduplication"); - - // Use DataFusion to verify the data - let ctx = SessionContext::new(); - let df = ctx.read_batches(result_batches)?; - - // Verify that user "1", product "A" has rating "5" - let result = df - .clone() - .filter( - col("user_id") - .eq(lit("1")) - .and(col("product_id").eq(lit("A"))), - )? - .select(vec![col("rating")])? - .collect() - .await?; - - assert_eq!(result.len(), 1, "Should have exactly one batch"); - assert_eq!( - result[0].num_rows(), - 1, - "Should have exactly one row for user '1', product 'A'" - ); - - let rating_array = result[0] - .column(0) - .as_any() - .downcast_ref::() - .expect("Failed to cast rating column"); - assert_eq!( - rating_array.value(0), - "5", - "Expected updated rating '5' for user '1', product 'A'" - ); - - Ok(()) - } - - #[test] - fn test_upsert_options_try_from_str_empty() { - let result = UpsertOptions::try_from(""); - assert!(result.is_ok()); - let options = result.unwrap(); - assert!(!options.remove_duplicates); - assert!(!options.last_write_wins); - assert!(options.is_default()); - } - - #[test] - fn test_upsert_options_try_from_str_remove_duplicates() { - let result = UpsertOptions::try_from("remove_duplicates"); - assert!(result.is_ok()); - let options = result.unwrap(); - assert!(options.remove_duplicates); - assert!(!options.last_write_wins); - assert!(!options.is_default()); - } - - #[test] - fn test_upsert_options_try_from_str_last_write_wins() { - let result = UpsertOptions::try_from("last_write_wins"); - assert!(result.is_ok()); - let options = result.unwrap(); - assert!(!options.remove_duplicates); - assert!(options.last_write_wins); - assert!(!options.is_default()); - } - - #[test] - fn test_upsert_options_try_from_str_both_options() { - let result = UpsertOptions::try_from("remove_duplicates,last_write_wins"); - assert!(result.is_ok()); - let options = result.unwrap(); - assert!(options.remove_duplicates); - assert!(options.last_write_wins); - assert!(!options.is_default()); - } - - #[test] - fn test_upsert_options_try_from_str_both_options_reverse_order() { - let result = UpsertOptions::try_from("last_write_wins,remove_duplicates"); - assert!(result.is_ok()); - let options = result.unwrap(); - assert!(options.remove_duplicates); - assert!(options.last_write_wins); - assert!(!options.is_default()); - } - - #[test] - fn test_upsert_options_try_from_str_with_spaces() { - let result = UpsertOptions::try_from(" remove_duplicates , last_write_wins "); - assert!(result.is_ok()); - let options = result.unwrap(); - assert!(options.remove_duplicates); - assert!(options.last_write_wins); - assert!(!options.is_default()); - } - - #[test] - fn test_upsert_options_try_from_str_invalid_option() { - let result = UpsertOptions::try_from("invalid_option"); - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error - .to_string() - .contains("Unknown upsert option: invalid_option")); - } - - #[test] - fn test_upsert_options_try_from_str_mixed_valid_invalid() { - let result = UpsertOptions::try_from("remove_duplicates,invalid_option"); - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error - .to_string() - .contains("Unknown upsert option: invalid_option")); - } - - #[test] - fn test_upsert_options_try_from_str_multiple_invalid_options() { - let result = UpsertOptions::try_from("invalid1,invalid2"); - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error - .to_string() - .contains("Unknown upsert option: invalid1")); - } - - #[test] - fn test_upsert_options_try_from_str_case_sensitive() { - let result = UpsertOptions::try_from("Remove_Duplicates"); - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error - .to_string() - .contains("Unknown upsert option: Remove_Duplicates")); - } } diff --git a/core/src/util/mod.rs b/core/src/util/mod.rs index 48a5d412..4f41f042 100644 --- a/core/src/util/mod.rs +++ b/core/src/util/mod.rs @@ -1,12 +1,10 @@ -use datafusion::error::Result as DataFusionResult; -use datafusion::logical_expr::Expr; -use datafusion::sql::unparser::dialect::DefaultDialect; -use datafusion::sql::unparser::Unparser; use snafu::prelude::*; use std::hash::Hash; -use std::{collections::HashMap, sync::Arc}; -use crate::{sql::sql_provider_datafusion::expr::Engine, UnsupportedTypeAction}; +use datafusion::common::DataFusionError; +use std::collections::HashMap; + +use crate::UnsupportedTypeAction; pub mod column_reference; pub mod constraints; @@ -14,6 +12,7 @@ pub mod indexes; pub mod ns_lookup; pub mod on_conflict; pub mod retriable_error; +pub mod table_arg_replace; #[cfg(any(feature = "sqlite", feature = "duckdb", feature = "postgres"))] pub mod schema; @@ -28,22 +27,6 @@ pub enum Error { }, } -pub fn filters_to_sql(filters: &[Expr], engine: Option) -> Result { - let dialect = engine - .map(|e| e.dialect()) - .unwrap_or(Arc::new(DefaultDialect {})); - Ok(filters - .iter() - .map(|f| { - Unparser::new(dialect.as_ref()) - .expr_to_sql(f) - .map(|e| e.to_string()) - }) - .collect::>>() - .context(UnableToGenerateSQLSnafu)? - .join(" AND ")) -} - #[must_use] pub fn hashmap_from_option_string(hashmap_option_str: &str) -> HashMap where @@ -79,6 +62,14 @@ pub fn remove_prefix_from_hashmap_keys( .collect() } +#[must_use] +pub fn to_datafusion_error(error: E) -> DataFusionError +where + E: std::error::Error + Send + Sync + 'static, +{ + DataFusionError::External(Box::new(error)) +} + /// If the `UnsupportedTypeAction` is `Error` or `String`, the function will return an error. /// If the `UnsupportedTypeAction` is `Warn`, the function will log a warning. /// If the `UnsupportedTypeAction` is `Ignore`, the function will do nothing. diff --git a/core/src/util/on_conflict.rs b/core/src/util/on_conflict.rs index 9d4515a3..bb82e6fa 100644 --- a/core/src/util/on_conflict.rs +++ b/core/src/util/on_conflict.rs @@ -1,11 +1,9 @@ -use arrow::datatypes::SchemaRef; +use datafusion::arrow::datatypes::SchemaRef; use itertools::Itertools; use sea_query::{self, Alias}; use snafu::prelude::*; use std::fmt::Display; -use crate::util::constraints::{self, UpsertOptions}; - use super::column_reference::{self, ColumnReference}; #[derive(Debug, Snafu)] @@ -18,16 +16,13 @@ pub enum Error { #[snafu(display("Expected semicolon in: {token}"))] ExpectedSemicolon { token: String }, - - #[snafu(display("Invalid upsert options: {source}"))] - InvalidUpsertOptions { source: constraints::Error }, } #[derive(Debug, Clone, PartialEq)] pub enum OnConflict { DoNothingAll, DoNothing(ColumnReference), - Upsert(ColumnReference, UpsertOptions), + Upsert(ColumnReference), } impl OnConflict { @@ -41,7 +36,7 @@ impl OnConflict { column.iter().join(r#"", ""#) ) } - OnConflict::Upsert(column, _) => { + OnConflict::Upsert(column) => { let non_constraint_columns = schema .fields() .iter() @@ -85,7 +80,7 @@ impl OnConflict { on_conflict.do_nothing(); on_conflict } - OnConflict::Upsert(column, _) => { + OnConflict::Upsert(column) => { let mut on_conflict = sea_query::OnConflict::columns::, Alias>( column.iter().map(Alias::new).collect(), ); @@ -103,13 +98,6 @@ impl OnConflict { } } } - - pub fn get_upsert_options(&self) -> UpsertOptions { - match self { - OnConflict::Upsert(_, options) => options.clone(), - _ => UpsertOptions::default(), - } - } } impl Display for OnConflict { @@ -117,7 +105,7 @@ impl Display for OnConflict { match self { OnConflict::DoNothingAll => write!(f, "do_nothing_all"), OnConflict::DoNothing(column) => write!(f, "do_nothing:{column}"), - OnConflict::Upsert(column, options) => write!(f, "upsert:{column}#{options}"), + OnConflict::Upsert(column) => write!(f, "upsert:{column}"), } } } @@ -138,25 +126,13 @@ impl TryFrom<&str> for OnConflict { .fail(); } - let upsert_parts: Vec<&str> = parts[1].split('#').collect(); - let column_ref = - ColumnReference::try_from(upsert_parts[0]).context(InvalidColumnReferenceSnafu)?; - - let upsert_options = if parts[0] == "upsert" { - if upsert_parts.len() == 2 { - UpsertOptions::try_from(upsert_parts[1]).context(InvalidUpsertOptionsSnafu)? - } else { - UpsertOptions::default() - } - } else { - UpsertOptions::default() - }; + ColumnReference::try_from(parts[1]).context(InvalidColumnReferenceSnafu)?; let on_conflict_behavior = parts[0]; match on_conflict_behavior { "do_nothing" => Ok(OnConflict::DoNothing(column_ref)), - "upsert" => Ok(OnConflict::Upsert(column_ref, upsert_options)), + "upsert" => Ok(OnConflict::Upsert(column_ref)), _ => UnexpectedTokenSnafu { token: parts[0].to_string(), } @@ -169,11 +145,9 @@ impl TryFrom<&str> for OnConflict { mod tests { use std::sync::Arc; - use arrow::datatypes::{DataType, Field, Schema}; + use datafusion::arrow::datatypes::{DataType, Field, Schema}; - use crate::util::{ - column_reference::ColumnReference, constraints::UpsertOptions, on_conflict::OnConflict, - }; + use crate::util::{column_reference::ColumnReference, on_conflict::OnConflict}; #[test] fn test_on_conflict_from_str() { @@ -189,10 +163,7 @@ mod tests { let on_conflict = OnConflict::try_from("upsert:col2").expect("valid on conflict"); assert_eq!( on_conflict, - OnConflict::Upsert( - ColumnReference::new(vec!["col2".to_string()]), - UpsertOptions::default() - ) + OnConflict::Upsert(ColumnReference::new(vec!["col2".to_string()])) ); let err = OnConflict::try_from("do_nothing").expect_err("invalid on conflict"); @@ -217,174 +188,12 @@ mod tests { OnConflict::DoNothing(ColumnReference::new(vec!["col1".to_string()])) ); - let on_conflict = OnConflict::Upsert( - ColumnReference::new(vec!["col2".to_string()]), - UpsertOptions::default(), - ) - .to_string(); - assert_eq!( - OnConflict::try_from(on_conflict.as_str()).expect("valid on conflict"), - OnConflict::Upsert( - ColumnReference::new(vec!["col2".to_string()]), - UpsertOptions::default() - ) - ); - } - - #[test] - fn test_upsert_parsing_with_default_options() { - let on_conflict = OnConflict::try_from("upsert:col1").expect("valid on conflict"); - assert_eq!( - on_conflict, - OnConflict::Upsert( - ColumnReference::new(vec!["col1".to_string()]), - UpsertOptions::default() - ) - ); - - // Test explicit empty options string - let on_conflict = OnConflict::try_from("upsert:col1#").expect("valid on conflict"); - assert_eq!( - on_conflict, - OnConflict::Upsert( - ColumnReference::new(vec!["col1".to_string()]), - UpsertOptions::default() - ) - ); - } - - #[test] - fn test_upsert_parsing_with_remove_duplicates() { let on_conflict = - OnConflict::try_from("upsert:col1#remove_duplicates").expect("valid on conflict"); - assert_eq!( - on_conflict, - OnConflict::Upsert( - ColumnReference::new(vec!["col1".to_string()]), - UpsertOptions::new().with_remove_duplicates(true) - ) - ); - } - - #[test] - fn test_upsert_parsing_with_last_write_wins() { - let on_conflict = - OnConflict::try_from("upsert:col1#last_write_wins").expect("valid on conflict"); - assert_eq!( - on_conflict, - OnConflict::Upsert( - ColumnReference::new(vec!["col1".to_string()]), - UpsertOptions::new().with_last_write_wins(true) - ) - ); - } - - #[test] - fn test_upsert_parsing_with_both_options() { - let on_conflict = OnConflict::try_from("upsert:col1#remove_duplicates,last_write_wins") - .expect("valid on conflict"); + OnConflict::Upsert(ColumnReference::new(vec!["col2".to_string()])).to_string(); assert_eq!( - on_conflict, - OnConflict::Upsert( - ColumnReference::new(vec!["col1".to_string()]), - UpsertOptions::new() - .with_remove_duplicates(true) - .with_last_write_wins(true) - ) - ); - - // Test reverse order - let on_conflict = OnConflict::try_from("upsert:col1#last_write_wins,remove_duplicates") - .expect("valid on conflict"); - assert_eq!( - on_conflict, - OnConflict::Upsert( - ColumnReference::new(vec!["col1".to_string()]), - UpsertOptions::new() - .with_remove_duplicates(true) - .with_last_write_wins(true) - ) - ); - } - - #[test] - fn test_upsert_parsing_with_spaces_in_options() { - let on_conflict = OnConflict::try_from("upsert:col1# remove_duplicates , last_write_wins ") - .expect("valid on conflict"); - assert_eq!( - on_conflict, - OnConflict::Upsert( - ColumnReference::new(vec!["col1".to_string()]), - UpsertOptions::new() - .with_remove_duplicates(true) - .with_last_write_wins(true) - ) - ); - } - - #[test] - fn test_upsert_parsing_with_invalid_options() { - let err = - OnConflict::try_from("upsert:col1#invalid_option").expect_err("invalid upsert option"); - assert!(err.to_string().contains("Invalid upsert options")); - - let err = OnConflict::try_from("upsert:col1#remove_duplicates,invalid_option") - .expect_err("invalid upsert option"); - assert!(err.to_string().contains("Invalid upsert options")); - } - - #[test] - fn test_upsert_parsing_with_composite_columns() { - let on_conflict = OnConflict::try_from("upsert:(col1,col2)#remove_duplicates") - .expect("valid on conflict"); - assert_eq!( - on_conflict, - OnConflict::Upsert( - ColumnReference::new(vec!["col1".to_string(), "col2".to_string()]), - UpsertOptions::new().with_remove_duplicates(true) - ) - ); - } - - #[test] - fn test_upsert_roundtrip_with_options() { - // Test default options - let on_conflict = OnConflict::Upsert( - ColumnReference::new(vec!["col1".to_string()]), - UpsertOptions::default(), - ); - let roundtrip = - OnConflict::try_from(on_conflict.to_string().as_str()).expect("valid roundtrip"); - assert_eq!(roundtrip, on_conflict); - - // Test with remove_duplicates - let on_conflict = OnConflict::Upsert( - ColumnReference::new(vec!["col1".to_string()]), - UpsertOptions::new().with_remove_duplicates(true), - ); - let roundtrip = - OnConflict::try_from(on_conflict.to_string().as_str()).expect("valid roundtrip"); - assert_eq!(roundtrip, on_conflict); - - // Test with last_write_wins - let on_conflict = OnConflict::Upsert( - ColumnReference::new(vec!["col1".to_string()]), - UpsertOptions::new().with_last_write_wins(true), - ); - let roundtrip = - OnConflict::try_from(on_conflict.to_string().as_str()).expect("valid roundtrip"); - assert_eq!(roundtrip, on_conflict); - - // Test with both options - let on_conflict = OnConflict::Upsert( - ColumnReference::new(vec!["col1".to_string()]), - UpsertOptions::new() - .with_remove_duplicates(true) - .with_last_write_wins(true), + OnConflict::try_from(on_conflict.as_str()).expect("valid on conflict"), + OnConflict::Upsert(ColumnReference::new(vec!["col2".to_string()])) ); - let roundtrip = - OnConflict::try_from(on_conflict.to_string().as_str()).expect("valid roundtrip"); - assert_eq!(roundtrip, on_conflict); } #[test] @@ -405,26 +214,10 @@ mod tests { r#"ON CONFLICT ("col1") DO NOTHING"#.to_string() ); - let on_conflict = OnConflict::Upsert( - ColumnReference::new(vec!["col2".to_string()]), - UpsertOptions::default(), - ); + let on_conflict = OnConflict::Upsert(ColumnReference::new(vec!["col2".to_string()])); assert_eq!( on_conflict.build_on_conflict_statement(&schema), r#"ON CONFLICT ("col2") DO UPDATE SET "col1" = EXCLUDED."col1""#.to_string() ); - - // Test that upsert options don't affect SQL statement generation - // (the options are used during batch validation, not SQL generation) - let on_conflict_with_options = OnConflict::Upsert( - ColumnReference::new(vec!["col2".to_string()]), - UpsertOptions::new() - .with_remove_duplicates(true) - .with_last_write_wins(true), - ); - assert_eq!( - on_conflict_with_options.build_on_conflict_statement(&schema), - r#"ON CONFLICT ("col2") DO UPDATE SET "col1" = EXCLUDED."col1""#.to_string() - ); } } diff --git a/core/src/util/table_arg_replace.rs b/core/src/util/table_arg_replace.rs new file mode 100644 index 00000000..8be67a0b --- /dev/null +++ b/core/src/util/table_arg_replace.rs @@ -0,0 +1,107 @@ +use std::ops::ControlFlow; + +use datafusion::sql::{ + sqlparser::ast::{ + FunctionArg, Ident, ObjectName, TableAlias, TableFactor, TableFunctionArgs, VisitorMut, + }, + TableReference, +}; + +#[derive(Debug, Clone, PartialEq, Eq, Default)] +pub struct TableArgReplace { + pub tables: Vec<(TableReference, TableFunctionArgs)>, +} + +impl TableArgReplace { + /// Constructs a new `TableArgReplace` instance. + pub fn new(tables: Vec<(TableReference, Vec)>) -> Self { + Self { + tables: tables + .into_iter() + .map(|(table, args)| { + ( + table, + TableFunctionArgs { + args, + settings: None, + }, + ) + }) + .collect(), + } + } + + /// Adds a new table argument replacement. + pub fn with(mut self, table: TableReference, args: Vec) -> Self { + self.tables.push(( + table, + TableFunctionArgs { + args, + settings: None, + }, + )); + self + } + + #[cfg(feature = "federation")] + /// Converts the `TableArgReplace` instance into an `AstAnalyzerRule`. + pub fn into_analyzer(self) -> datafusion_federation::sql::ast_analyzer::AstAnalyzerRule { + let mut visitor = self; + let x = move |mut statement: datafusion::sql::sqlparser::ast::Statement| { + let _ = datafusion::sql::sqlparser::ast::VisitMut::visit(&mut statement, &mut visitor); + Ok(statement) + }; + Box::new(x) + } +} + +impl VisitorMut for TableArgReplace { + type Break = (); + fn pre_visit_table_factor( + &mut self, + table_factor: &mut TableFactor, + ) -> ControlFlow { + if let TableFactor::Table { + name, args, alias, .. + } = table_factor + { + let name_as_tableref = name_to_table_reference(name); + if let Some((table, arg)) = self + .tables + .iter() + .find(|(t, _)| t.resolved_eq(&name_as_tableref)) + { + *args = Some(arg.clone()); + if alias.is_none() { + *alias = Some(TableAlias { + name: Ident::new(table.table()), + columns: vec![], + }) + } + } + } + ControlFlow::Continue(()) + } +} + +fn name_to_table_reference(name: &ObjectName) -> TableReference { + let first = name + .0 + .first() + .map(|n| n.as_ident().expect("expected Ident").value.to_string()); + let second = name + .0 + .get(1) + .map(|n| n.as_ident().expect("expected Ident").value.to_string()); + let third = name + .0 + .get(2) + .map(|n| n.as_ident().expect("expected Ident").value.to_string()); + + match (first, second, third) { + (Some(first), Some(second), Some(third)) => TableReference::full(first, second, third), + (Some(first), Some(second), None) => TableReference::partial(first, second), + (Some(first), None, None) => TableReference::bare(first), + _ => panic!("Invalid table name"), + } +} diff --git a/core/src/util/test.rs b/core/src/util/test.rs index 148c7d40..970d5363 100644 --- a/core/src/util/test.rs +++ b/core/src/util/test.rs @@ -1,6 +1,7 @@ use std::{any::Any, sync::Arc}; -use arrow::{array::RecordBatch, datatypes::SchemaRef}; +use datafusion::arrow::{array::RecordBatch, datatypes::SchemaRef}; +use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType}; use datafusion::{ common::Statistics, error::{DataFusionError, Result}, @@ -8,7 +9,6 @@ use datafusion::{ physical_expr::EquivalenceProperties, physical_plan::{ common, - execution_plan::{Boundedness, EmissionType}, stream::{RecordBatchReceiverStream, RecordBatchStreamAdapter}, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties, }, diff --git a/core/tests/arrow_record_batch_gen/mod.rs b/core/tests/arrow_record_batch_gen/mod.rs index c915703b..33d14bd1 100644 --- a/core/tests/arrow_record_batch_gen/mod.rs +++ b/core/tests/arrow_record_batch_gen/mod.rs @@ -1,12 +1,12 @@ -use arrow::array::RecordBatch; -use arrow::{ +use chrono::NaiveDate; +use datafusion::arrow::array::RecordBatch; +use datafusion::arrow::{ array::*, datatypes::{ i256, DataType, Date32Type, Date64Type, Field, Int8Type, IntervalDayTime, IntervalMonthDayNano, IntervalUnit, Schema, SchemaRef, TimeUnit, }, }; -use chrono::NaiveDate; use std::sync::Arc; // Helper functions to create arrow record batches of different types @@ -247,6 +247,63 @@ pub(crate) fn get_arrow_timestamp_record_batch() -> (RecordBatch, SchemaRef) { (record_batch, schema) } +pub(crate) fn get_arrow_timestamp_record_batch_without_timezone() -> (RecordBatch, SchemaRef) { + // Timestamp Types + let timestamp_second_array = + TimestampSecondArray::from(vec![1_680_000_000, 1_680_040_000, 1_680_080_000]); + let timestamp_milli_array = TimestampMillisecondArray::from(vec![ + 1_680_000_000_000, + 1_680_040_000_000, + 1_680_080_000_000, + ]); + let timestamp_micro_array = TimestampMicrosecondArray::from(vec![ + 1_680_000_000_000_000, + 1_680_040_000_000_000, + 1_680_080_000_000_000, + ]); + let timestamp_nano_array = TimestampNanosecondArray::from(vec![ + 1_680_000_000_000_000_000, + 1_680_040_000_000_000_000, + 1_680_080_000_000_000_000, + ]); + + let schema = Arc::new(Schema::new(vec![ + Field::new( + "timestamp_second", + DataType::Timestamp(TimeUnit::Second, None), + false, + ), + Field::new( + "timestamp_milli", + DataType::Timestamp(TimeUnit::Millisecond, None), + false, + ), + Field::new( + "timestamp_micro", + DataType::Timestamp(TimeUnit::Microsecond, None), + false, + ), + Field::new( + "timestamp_nano", + DataType::Timestamp(TimeUnit::Nanosecond, None), + false, + ), + ])); + + let record_batch = RecordBatch::try_new( + Arc::clone(&schema), + vec![ + Arc::new(timestamp_second_array), + Arc::new(timestamp_milli_array), + Arc::new(timestamp_micro_array), + Arc::new(timestamp_nano_array), + ], + ) + .expect("Failed to created arrow timestamp record batch"); + + (record_batch, schema) +} + // Date32, Date64 pub(crate) fn get_arrow_date_record_batch() -> (RecordBatch, SchemaRef) { let date32_array = Date32Array::from(vec![ @@ -359,6 +416,28 @@ pub(crate) fn get_arrow_decimal_record_batch() -> (RecordBatch, SchemaRef) { (record_batch, schema) } +pub(crate) fn get_mysql_arrow_decimal_record() -> (RecordBatch, SchemaRef) { + let decimal128_array = + Decimal128Array::from(vec![i128::from(123), i128::from(222), i128::from(321)]); + let decimal256_array = + Decimal256Array::from(vec![i256::from(-123), i256::from(222), i256::from(0)]) + .with_precision_and_scale(65, 10) + .expect("Fail to create Decimal256(65, 10) array"); + + let schema = Arc::new(Schema::new(vec![ + Field::new("decimal128", DataType::Decimal128(38, 10), false), + Field::new("decimal256", DataType::Decimal256(65, 10), false), // Maximum is 65. + ])); + + let record_batch = RecordBatch::try_new( + Arc::clone(&schema), + vec![Arc::new(decimal128_array), Arc::new(decimal256_array)], + ) + .expect("Failed to created arrow decimal record batch"); + + (record_batch, schema) +} + // Duration pub(crate) fn get_arrow_duration_record_batch() -> (RecordBatch, SchemaRef) { let duration_nano_array = DurationNanosecondArray::from(vec![1, 2, 3]); @@ -557,10 +636,10 @@ pub(crate) fn get_arrow_list_of_lists_record_batch() -> (RecordBatch, Arc (RecordBatch, SchemaRef) { (rb, schema) } -fn parse_json_to_batch(json_data: &str, schema: SchemaRef) -> RecordBatch { +pub(crate) fn parse_json_to_batch(json_data: &str, schema: SchemaRef) -> RecordBatch { let reader = arrow_json::ReaderBuilder::new(schema) .build(std::io::Cursor::new(json_data)) .expect("Failed to create JSON reader"); diff --git a/core/tests/clickhouse/common.rs b/core/tests/clickhouse/common.rs new file mode 100644 index 00000000..688db25a --- /dev/null +++ b/core/tests/clickhouse/common.rs @@ -0,0 +1,64 @@ +use bollard::secret::HealthConfig; +use datafusion_table_providers::{ + sql::db_connection_pool::clickhousepool::ClickHouseConnectionPool, util::secrets::to_secret_map, +}; +use secrecy::SecretString; +use std::collections::HashMap; +use tracing::instrument; + +use crate::docker::{ContainerRunnerBuilder, RunningContainer}; + +const CLICKHOUSE_USER: &str = "user"; +const CLICKHOUSE_PASSWORD: &str = "integration-test-pw"; +const CLICKHOUSE_DOCKER_CONTAINER: &str = "runtime-integration-test-clickhouse"; + +pub(super) fn get_clickhouse_params() -> HashMap { + to_secret_map(HashMap::from([ + ("url".to_string(), "http://localhost:8123".to_string()), + ("user".to_string(), CLICKHOUSE_USER.to_string()), + ("password".to_string(), CLICKHOUSE_PASSWORD.to_string()), + ])) +} + +#[instrument] +pub async fn start_clickhouse_docker_container() -> Result { + let container_name = CLICKHOUSE_DOCKER_CONTAINER; + + let clickhouse_docker_image = std::env::var("CLICKHOUSE_DOCKER_IMAGE") + .unwrap_or_else(|_| format!("{}clickhouse:latest", "registry.hub.docker.com/library/")); + + let running_container = ContainerRunnerBuilder::new(container_name) + .image(clickhouse_docker_image) + .add_port_binding(8123, 8123) + .add_env_var("CLICKHOUSE_USER", CLICKHOUSE_USER) + .add_env_var("CLICKHOUSE_PASSWORD", CLICKHOUSE_PASSWORD) + .healthcheck(HealthConfig { + test: Some(vec![ + "CMD-SHELL".to_string(), + format!( + "wget --no-verbose --tries=1 --spider http://localhost:8123/ping || exit 1" + ), + ]), + interval: Some(500_000_000), // 250ms + timeout: Some(100_000_000), // 100ms + retries: Some(5), + start_period: Some(500_000_000), // 100ms + start_interval: None, + }) + .build()? + .run() + .await?; + + tokio::time::sleep(std::time::Duration::from_secs(5)).await; + Ok(running_container) +} + +#[instrument] +pub(super) async fn get_mysql_connection_pool( + port: usize, +) -> Result { + let pool = ClickHouseConnectionPool::new(get_clickhouse_params()) + .await + .expect("Failed to create MySQL Connection Pool"); + Ok(pool) +} diff --git a/core/tests/clickhouse/mod.rs b/core/tests/clickhouse/mod.rs new file mode 100644 index 00000000..3e785fe6 --- /dev/null +++ b/core/tests/clickhouse/mod.rs @@ -0,0 +1,124 @@ +use clickhouse::Client; +use common::{get_clickhouse_params, start_clickhouse_docker_container}; +use datafusion::{prelude::SessionContext, sql::TableReference}; +use datafusion_table_providers::{ + clickhouse::ClickHouseTableFactory, + sql::db_connection_pool::clickhousepool::ClickHouseConnectionPool, +}; + +mod common; + +use serde::{Deserialize, Serialize}; + +#[derive(clickhouse::Row, Serialize, Deserialize, Debug, PartialEq)] +struct Row { + id: i64, + name: String, + age: i32, + is_active: bool, + score: f64, + created_at: i64, + tags: Vec, + tag_groups: Vec>, + attributes: (f32, f32), +} + +fn create_sample_rows() -> Vec { + vec![ + Row { + id: 1, + name: "Alice".to_string(), + age: 30, + is_active: true, + score: 91.5, + created_at: 1689000000000, + tags: vec!["fast".to_string(), "smart".to_string()], + tag_groups: vec![ + vec!["group1".to_string(), "groupA".to_string()], + vec!["group2".to_string()], + ], + attributes: (5.5, 130.0), + }, + Row { + id: 2, + name: "Bob".to_string(), + age: 45, + is_active: false, + score: 85.2, + created_at: 1689000360000, + tags: vec!["strong".to_string()], + tag_groups: vec![vec!["group3".to_string()]], + attributes: (6.1, 180.0), + }, + ] +} + +async fn create_table(client: Client, table_name: &str) { + let sql: String = format!( + " + CREATE TABLE IF NOT EXISTS {table_name} ( + id Int64, + name String, + age Int32, + is_active Bool, + score Float64, + created_at DateTime64(3), + tags Array(String), + tag_groups Array(Array(String)), + attributes Tuple(Float32, Float32), + ) ENGINE = MergeTree() ORDER BY id; + " + ); + client.query(&sql).execute().await.unwrap(); +} + +async fn insert_rows( + client: &clickhouse::Client, + table: &str, + rows: Vec, +) -> clickhouse::error::Result<()> { + let mut insert = client.insert(table)?; + for row in rows { + insert.write(&row).await?; + } + insert.end().await?; + Ok(()) +} + +/// inserts data into clickhouse using official client and reads it back for now +#[tokio::test] +async fn clickhouse_insert_and_read() { + start_clickhouse_docker_container().await.unwrap(); + + let table_name = "test_table"; + let pool = ClickHouseConnectionPool::new(get_clickhouse_params()) + .await + .unwrap(); + + create_table(pool.client(), table_name).await; + insert_rows(&pool.client, table_name, create_sample_rows()) + .await + .unwrap(); + + let factory = ClickHouseTableFactory::new(pool); + let ctx = SessionContext::new(); + + let table_provider = factory + .table_provider(TableReference::bare(table_name), None) + .await + .unwrap(); + + ctx.register_table(table_name, table_provider) + .expect("Table should be registered"); + + let sql = format!("SELECT * FROM {table_name}"); + let df = ctx + .sql(&sql) + .await + .expect("DataFrame should be created from query"); + + let record_batch = df.collect().await.expect("RecordBatch should be collected"); + + assert_eq!(record_batch[0].num_rows(), 2); + assert_eq!(record_batch[0].num_columns(), 9); +} diff --git a/core/tests/docker/mod.rs b/core/tests/docker/mod.rs index 058d79d9..80347cb6 100644 --- a/core/tests/docker/mod.rs +++ b/core/tests/docker/mod.rs @@ -1,8 +1,12 @@ use std::{borrow::Cow, collections::HashMap, sync::Arc}; use bollard::{ - container::{Config, CreateContainerOptions, RemoveContainerOptions, StartContainerOptions}, + container::{ + Config, CreateContainerOptions, InspectContainerOptions, RemoveContainerOptions, + StartContainerOptions, StopContainerOptions, + }, image::CreateImageOptions, + query_parameters::{ListContainersOptions, ListImagesOptions}, secret::{ ContainerState, ContainerStateStatusEnum, Health, HealthConfig, HealthStatusEnum, HostConfig, PortBinding, @@ -39,7 +43,9 @@ pub async fn remove(docker: &Docker, name: &str) -> Result<(), anyhow::Error> { } pub async fn stop(docker: &Docker, name: &str) -> Result<(), anyhow::Error> { - Ok(docker.stop_container(name, None).await?) + Ok(docker + .stop_container(name, Option::::None) + .await?) } pub struct ContainerRunnerBuilder<'a> { @@ -105,7 +111,7 @@ pub struct ContainerRunner<'a> { healthcheck: Option, } -impl<'a> ContainerRunner<'a> { +impl ContainerRunner<'_> { pub async fn run(self) -> Result { if self.is_container_running().await? { remove(&self.docker, &self.name).await?; @@ -124,7 +130,7 @@ impl<'a> ContainerRunner<'a> { format!("{container_port}/tcp"), Some(vec![PortBinding { host_ip: Some("127.0.0.1".to_string()), - host_port: Some(format!("{host_port}")), + host_port: Some(format!("{host_port}/tcp")), }]), ); } @@ -164,7 +170,10 @@ impl<'a> ContainerRunner<'a> { let start_time = std::time::Instant::now(); loop { - let inspect_container = self.docker.inspect_container(&self.name, None).await?; + let inspect_container = self + .docker + .inspect_container(&self.name, Option::::None) + .await?; tracing::trace!("Container status: {:?}", inspect_container.state); if let Some(ContainerState { @@ -196,7 +205,10 @@ impl<'a> ContainerRunner<'a> { async fn pull_image(&self) -> Result<(), anyhow::Error> { // Check if image is already pulled - let images = self.docker.list_images::<&str>(None).await?; + let images = self + .docker + .list_images(Option::::None) + .await?; for image in images { if image.repo_tags.iter().any(|t| t == &self.image) { tracing::debug!("Docker image {} already pulled", self.image); @@ -218,7 +230,10 @@ impl<'a> ContainerRunner<'a> { } async fn is_container_running(&self) -> Result { - let containers = self.docker.list_containers::<&str>(None).await?; + let containers = self + .docker + .list_containers(Option::::None) + .await?; for container in containers { let Some(names) = container.names else { continue; diff --git a/core/tests/duckdb/mod.rs b/core/tests/duckdb/mod.rs index 47db1f9e..3d933dc3 100644 --- a/core/tests/duckdb/mod.rs +++ b/core/tests/duckdb/mod.rs @@ -1,10 +1,9 @@ use crate::arrow_record_batch_gen::*; -use arrow::array::RecordBatch; -use arrow::datatypes::SchemaRef; +use datafusion::arrow::array::RecordBatch; +use datafusion::arrow::datatypes::SchemaRef; use datafusion::catalog::TableProviderFactory; use datafusion::common::{Constraints, ToDFSchema}; use datafusion::datasource::memory::MemorySourceConfig; -use datafusion::datasource::source::DataSourceExec; use datafusion::execution::context::SessionContext; use datafusion::logical_expr::dml::InsertOp; use datafusion::logical_expr::CreateExternalTable; @@ -33,7 +32,7 @@ async fn arrow_duckdb_round_trip( order_exprs: vec![], unbounded: false, options: HashMap::new(), - constraints: Constraints::new_unverified(vec![]), + constraints: Constraints::default(), column_defaults: HashMap::new(), temporary: false, }; @@ -44,12 +43,14 @@ async fn arrow_duckdb_round_trip( let ctx = SessionContext::new(); - let mem_exec = DataSourceExec::new(Arc::new( - MemorySourceConfig::try_new(&[vec![arrow_record.clone()]], arrow_record.schema(), None) - .expect("memory source config created"), - )); + let mem_exec = MemorySourceConfig::try_new_exec( + &[vec![arrow_record.clone()]], + arrow_record.schema(), + None, + ) + .expect("memory exec created"); let insert_plan = table_provider - .insert_into(&ctx.state(), Arc::new(mem_exec), InsertOp::Append) + .insert_into(&ctx.state(), mem_exec, InsertOp::Append) .await .expect("insert plan created"); diff --git a/core/tests/flight/mod.rs b/core/tests/flight/mod.rs index 34e4ac17..924bbd47 100644 --- a/core/tests/flight/mod.rs +++ b/core/tests/flight/mod.rs @@ -4,7 +4,6 @@ use std::pin::Pin; use std::sync::Arc; use std::time::Duration; -use arrow_array::{Array, Float32Array, Int64Array, Int8Array, RecordBatch}; use arrow_flight::encode::FlightDataEncoderBuilder; use arrow_flight::flight_service_server::{FlightService, FlightServiceServer}; use arrow_flight::sql::server::FlightSqlService; @@ -12,8 +11,9 @@ use arrow_flight::sql::{CommandStatementQuery, ProstMessageExt, SqlInfo, TicketS use arrow_flight::{ FlightDescriptor, FlightEndpoint, FlightInfo, HandshakeRequest, HandshakeResponse, Ticket, }; -use arrow_schema::{DataType, Field, Schema}; use async_trait::async_trait; +use datafusion::arrow::array::{Array, Float32Array, Int64Array, Int8Array, RecordBatch}; +use datafusion::arrow::datatypes::{DataType, Field, Schema}; use datafusion::prelude::SessionContext; use futures::{stream, Stream, TryStreamExt}; use prost::Message; @@ -28,7 +28,7 @@ use tonic::transport::Server; use tonic::{Extensions, Request, Response, Status, Streaming}; use datafusion_table_providers::flight::sql::FlightSqlDriver; -use datafusion_table_providers::flight::FlightTableFactory; +use datafusion_table_providers::flight::{FlightProperties, FlightTableFactory}; const AUTH_HEADER: &str = "authorization"; const BEARER_TOKEN: &str = "Bearer flight-sql-token"; @@ -161,8 +161,7 @@ async fn test_flight_sql_data_source() -> datafusion::common::Result<()> { Arc::new(Float32Array::from(vec![0.0, 0.1, 0.2, 0.3])), Arc::new(Int8Array::from(vec![10, 20, 30, 40])), ], - ) - .unwrap(); + )?; let rows_per_partition = partition_data.num_rows(); let query = "SELECT * FROM some_table"; @@ -174,9 +173,7 @@ async fn test_flight_sql_data_source() -> datafusion::common::Result<()> { endpoint_archetype, ]; let num_partitions = endpoints.len(); - let flight_info = FlightInfo::default() - .try_with_schema(partition_data.schema().as_ref()) - .unwrap(); + let flight_info = FlightInfo::default().try_with_schema(partition_data.schema().as_ref())?; let flight_info = endpoints .into_iter() .fold(flight_info, |fi, e| fi.with_endpoint(e)); @@ -194,11 +191,11 @@ async fn test_flight_sql_data_source() -> datafusion::common::Result<()> { }; let port = service.run_in_background(rx).await.port(); let ctx = SessionContext::new(); + let props_template = FlightProperties::new().with_reusable_flight_info(true); + let driver = FlightSqlDriver::new().with_properties_template(props_template); ctx.state_ref().write().table_factories_mut().insert( "FLIGHT_SQL".into(), - Arc::new(FlightTableFactory::new( - Arc::new(FlightSqlDriver::default()), - )), + Arc::new(FlightTableFactory::new(Arc::new(driver))), ); let _ = ctx .sql(&format!( diff --git a/core/tests/integration.rs b/core/tests/integration.rs index 5f635855..f576ca64 100644 --- a/core/tests/integration.rs +++ b/core/tests/integration.rs @@ -1,8 +1,10 @@ use rand::Rng; mod arrow_record_batch_gen; +#[cfg(feature = "clickhouse")] +mod clickhouse; mod docker; -#[cfg(feature = "duckdb")] +#[cfg(all(feature = "duckdb", feature = "federation"))] mod duckdb; #[cfg(feature = "flight")] mod flight; @@ -21,5 +23,5 @@ fn container_registry() -> String { } fn get_random_port() -> usize { - rand::thread_rng().gen_range(15432..65535) + rand::rng().random_range(15432..65535) } diff --git a/core/tests/mysql/common.rs b/core/tests/mysql/common.rs index 43577619..65c42662 100644 --- a/core/tests/mysql/common.rs +++ b/core/tests/mysql/common.rs @@ -12,7 +12,7 @@ use crate::{ const MYSQL_ROOT_PASSWORD: &str = "integration-test-pw"; const MYSQL_DOCKER_CONTAINER: &str = "runtime-integration-test-mysql"; -fn get_mysql_params(port: usize, time_zone: Option<&str>) -> HashMap { +pub(super) fn get_mysql_params(port: usize) -> HashMap { let mut params = HashMap::new(); params.insert( "mysql_host".to_string(), @@ -46,9 +46,6 @@ fn get_mysql_params(port: usize, time_zone: Option<&str>) -> HashMap Result, ) -> Result { - let mysql_pool = MySQLConnectionPool::new(get_mysql_params(port, time_zone)) + let mysql_pool = MySQLConnectionPool::new(get_mysql_params(port)) .await .expect("Failed to create MySQL Connection Pool"); diff --git a/core/tests/mysql/mod.rs b/core/tests/mysql/mod.rs index 10cdd21e..a2e24ec5 100644 --- a/core/tests/mysql/mod.rs +++ b/core/tests/mysql/mod.rs @@ -1,9 +1,9 @@ -use datafusion::execution::context::SessionContext; +use datafusion::{datasource::memory::MemorySourceConfig, execution::context::SessionContext}; use datafusion_table_providers::sql::{ db_connection_pool::DbConnectionPool, sql_provider_datafusion::SqlTable, }; use mysql_async::prelude::ToValue; -use rstest::rstest; +use rstest::{fixture, rstest}; use std::sync::Arc; use arrow::{ @@ -13,7 +13,19 @@ use arrow::{ use datafusion_table_providers::sql::db_connection_pool::dbconnection::AsyncDbConnection; +use crate::arrow_record_batch_gen::*; use crate::docker::RunningContainer; +use datafusion::arrow::datatypes::SchemaRef; +use datafusion::catalog::TableProviderFactory; +use datafusion::common::{Constraints, ToDFSchema}; +use datafusion::logical_expr::dml::InsertOp; +use datafusion::logical_expr::CreateExternalTable; +use datafusion::physical_plan::collect; +#[cfg(feature = "mysql-federation")] +use datafusion_federation::schema_cast::record_convert::try_cast_to; +use datafusion_table_providers::mysql::MySQLTableProviderFactory; +use secrecy::ExposeSecret; +use tokio::sync::Mutex; mod common; @@ -733,7 +745,7 @@ async fn arrow_mysql_one_way( + Sync + 'static, > = Arc::new(pool); - let table = SqlTable::new("mysql", &sqltable_pool, table_name, None) + let table = SqlTable::new("mysql", &sqltable_pool, table_name) .await .expect("Table should be created"); @@ -758,6 +770,96 @@ async fn arrow_mysql_one_way( record_batch } +#[allow(unused_variables)] +async fn arrow_mysql_round_trip( + port: usize, + arrow_record: RecordBatch, + source_schema: SchemaRef, + table_name: &str, +) { + let factory = MySQLTableProviderFactory::new(); + let ctx = SessionContext::new(); + let cmd = CreateExternalTable { + schema: Arc::new(arrow_record.schema().to_dfschema().expect("to df schema")), + name: table_name.into(), + location: "".to_string(), + file_type: "".to_string(), + table_partition_cols: vec![], + if_not_exists: false, + temporary: false, + definition: None, + order_exprs: vec![], + unbounded: false, + options: common::get_mysql_params(port) + .into_iter() + .map(|(k, v)| (k, v.expose_secret().to_string())) + .collect(), + constraints: Constraints::default(), + column_defaults: Default::default(), + }; + let table_provider = factory + .create(&ctx.state(), &cmd) + .await + .expect("table provider created"); + + let ctx = SessionContext::new(); + let mem_exec = MemorySourceConfig::try_new_exec( + &[vec![arrow_record.clone()]], + arrow_record.schema(), + None, + ) + .expect("memory exec created"); + let insert_plan = table_provider + .insert_into(&ctx.state(), mem_exec, InsertOp::Overwrite) + .await + .expect("insert plan created"); + + let _ = collect(insert_plan, ctx.task_ctx()) + .await + .expect("insert done"); + ctx.register_table(table_name, table_provider) + .expect("Table should be registered"); + let sql = format!("SELECT * FROM {table_name}"); + let df = ctx + .sql(&sql) + .await + .expect("DataFrame should be created from query"); + + let record_batch = df.collect().await.expect("RecordBatch should be collected"); + tracing::debug!("Original Arrow Record Batch: {:?}", arrow_record.columns()); + tracing::debug!( + "MySQL returned Record Batch: {:?}", + record_batch[0].columns() + ); + + #[cfg(feature = "mysql-federation")] + let casted_result = + try_cast_to(record_batch[0].clone(), source_schema).expect("Failed to cast record batch"); + + // Check results + assert_eq!(record_batch.len(), 1); + assert_eq!(record_batch[0].num_rows(), arrow_record.num_rows()); + assert_eq!(record_batch[0].num_columns(), arrow_record.num_columns()); + + #[cfg(feature = "mysql-federation")] + assert_eq!(arrow_record, casted_result); +} + +#[derive(Debug)] +struct ContainerManager { + port: usize, + claimed: bool, +} + +#[fixture] +#[once] +fn container_manager() -> Mutex { + Mutex::new(ContainerManager { + port: crate::get_random_port(), + claimed: false, + }) +} + async fn start_mysql_container(port: usize) -> RunningContainer { let running_container = common::start_mysql_docker_container(port) .await @@ -768,6 +870,46 @@ async fn start_mysql_container(port: usize) -> RunningContainer { running_container } +#[rstest] +#[case::binary(get_arrow_binary_record_batch(), "binary")] +#[case::int(get_arrow_int_record_batch(), "int")] +#[case::float(get_arrow_float_record_batch(), "float")] +#[case::utf8(get_arrow_utf8_record_batch(), "utf8")] +#[case::time(get_arrow_time_record_batch(), "time")] +#[case::timestamp(get_arrow_timestamp_record_batch_without_timezone(), "timestamp")] +#[case::date(get_arrow_date_record_batch(), "date")] +#[case::struct_type(get_arrow_struct_record_batch(), "struct")] +// MySQL only supports up to 65 precision for decimal through REAL type. +#[case::decimal(get_mysql_arrow_decimal_record(), "decimal")] +#[ignore] // TODO: interval types are broken in MySQL - Interval is not available in MySQL. +#[case::interval(get_arrow_interval_record_batch(), "interval")] +#[case::duration(get_arrow_duration_record_batch(), "duration")] +#[ignore] // TODO: array types are broken in MySQL - array is not available in MySQL. +#[case::list(get_arrow_list_record_batch(), "list")] +#[case::null(get_arrow_null_record_batch(), "null")] +#[ignore] +#[case::bytea_array(get_arrow_bytea_array_record_batch(), "bytea_array")] +#[test_log::test(tokio::test)] +async fn test_arrow_mysql_roundtrip( + container_manager: &Mutex, + #[case] arrow_result: (RecordBatch, SchemaRef), + #[case] table_name: &str, +) { + let mut container_manager = container_manager.lock().await; + if !container_manager.claimed { + container_manager.claimed = true; + start_mysql_container(container_manager.port).await; + } + + arrow_mysql_round_trip( + container_manager.port, + arrow_result.0, + arrow_result.1, + table_name, + ) + .await; +} + #[rstest] #[test_log::test(tokio::test)] async fn test_mysql_arrow_oneway() { diff --git a/core/tests/postgres/mod.rs b/core/tests/postgres/mod.rs index d30f6a9f..489db211 100644 --- a/core/tests/postgres/mod.rs +++ b/core/tests/postgres/mod.rs @@ -1,31 +1,32 @@ use crate::{arrow_record_batch_gen::*, docker::RunningContainer}; use arrow::{ - array::{Array, Decimal128Array, RecordBatch, StringArray}, + array::{Decimal128Array, RecordBatch}, datatypes::{DataType, Field, Schema, SchemaRef}, }; +use datafusion::execution::context::SessionContext; use datafusion::logical_expr::CreateExternalTable; use datafusion::physical_plan::collect; use datafusion::{catalog::TableProviderFactory, logical_expr::dml::InsertOp}; use datafusion::{ common::{Constraints, ToDFSchema}, - datasource::source::DataSourceExec, + datasource::memory::MemorySourceConfig, }; -use datafusion::{datasource::memory::MemorySourceConfig, execution::context::SessionContext}; +#[cfg(feature = "postgres-federation")] use datafusion_federation::schema_cast::record_convert::try_cast_to; + use datafusion_table_providers::{ postgres::{DynPostgresConnectionPool, PostgresTableProviderFactory}, sql::sql_provider_datafusion::SqlTable, UnsupportedTypeAction, }; use rstest::{fixture, rstest}; -use serde_json::{from_str, Value}; +use serde_json::Value; use std::collections::HashMap; use std::sync::Arc; use tokio::sync::{Mutex, MutexGuard}; mod common; mod schema; -mod schema_redshift; async fn arrow_postgres_round_trip( port: usize, @@ -46,7 +47,7 @@ async fn arrow_postgres_round_trip( order_exprs: vec![], unbounded: false, options: common::get_pg_params(port), - constraints: Constraints::new_unverified(vec![]), + constraints: Constraints::default(), column_defaults: HashMap::new(), temporary: false, }; @@ -56,12 +57,14 @@ async fn arrow_postgres_round_trip( .expect("table provider created"); let ctx = SessionContext::new(); - let mem_exec = DataSourceExec::new(Arc::new( - MemorySourceConfig::try_new(&[vec![arrow_record.clone()]], arrow_record.schema(), None) - .expect("memory source config created"), - )); + let mem_exec = MemorySourceConfig::try_new_exec( + &[vec![arrow_record.clone()]], + arrow_record.schema(), + None, + ) + .expect("memory exec created"); let insert_plan = table_provider - .insert_into(&ctx.state(), Arc::new(mem_exec), InsertOp::Append) + .insert_into(&ctx.state(), mem_exec, InsertOp::Append) .await .expect("insert plan created"); @@ -84,6 +87,7 @@ async fn arrow_postgres_round_trip( record_batch[0].columns() ); + #[cfg(feature = "postgres-federation")] let casted_result = try_cast_to(record_batch[0].clone(), source_schema).expect("Failed to cast record batch"); @@ -91,6 +95,7 @@ async fn arrow_postgres_round_trip( assert_eq!(record_batch.len(), 1); assert_eq!(record_batch[0].num_rows(), arrow_record.num_rows()); assert_eq!(record_batch[0].num_columns(), arrow_record.num_columns()); + #[cfg(feature = "postgres-federation")] assert_eq!(arrow_record, casted_result); } @@ -128,10 +133,9 @@ fn container_manager() -> Mutex { } async fn start_container(manager: &mut MutexGuard<'_, ContainerManager>) { - let running_container = - common::start_postgres_docker_container("postgres:latest", manager.port, None) - .await - .expect("Postgres container to start"); + let running_container = common::start_postgres_docker_container(manager.port) + .await + .expect("Postgres container to start"); manager.running_container = Some(running_container); @@ -209,7 +213,6 @@ async fn test_postgres_enum_type(port: usize) { extra_stmt, expected_record, UnsupportedTypeAction::default(), - true, ) .await; } @@ -267,7 +270,6 @@ async fn test_postgres_numeric_type(port: usize) { extra_stmt, expected_record, UnsupportedTypeAction::default(), - true, ) .await; } @@ -279,7 +281,7 @@ async fn test_postgres_jsonb_type(port: usize) { );"; let insert_table_stmt = r#" - INSERT INTO jsonb_values (data) VALUES + INSERT INTO jsonb_values (data) VALUES ('{"name": "John", "age": 30}'), ('{"name": "Jane", "age": 25}'), ('[1, 2, 3]'), @@ -289,49 +291,39 @@ async fn test_postgres_jsonb_type(port: usize) { let schema = Arc::new(Schema::new(vec![Field::new("data", DataType::Utf8, true)])); + // Parse and re-serialize the JSON to ensure consistent ordering let expected_values = vec![ - r#"{"name": "John", "age": 30}"#, - r#"{"name": "Jane", "age": 25}"#, - "[1, 2, 3]", - "null", - r#"{"nested": {"key": "value"}}"#, + serde_json::from_str::(r#"{"name":"John","age":30}"#) + .unwrap() + .to_string(), + serde_json::from_str::(r#"{"name":"Jane","age":25}"#) + .unwrap() + .to_string(), + serde_json::from_str::("[1,2,3]") + .unwrap() + .to_string(), + serde_json::from_str::("null").unwrap().to_string(), + serde_json::from_str::(r#"{"nested":{"key":"value"}}"#) + .unwrap() + .to_string(), ]; - let expected_json: Vec = expected_values - .iter() - .map(|s| from_str(s).unwrap()) - .collect(); - let expected_record = RecordBatch::try_new( Arc::clone(&schema), vec![Arc::new(arrow::array::StringArray::from(expected_values))], ) .expect("Failed to create arrow record batch"); - let actual_record_batch = arrow_postgres_one_way( + arrow_postgres_one_way( port, "jsonb_values", create_table_stmt, insert_table_stmt, None, - expected_record.clone(), + expected_record, UnsupportedTypeAction::String, - false, ) .await; - - let actual_data_column = actual_record_batch[0] - .column_by_name("data") - .unwrap() - .as_any() - .downcast_ref::() - .unwrap(); - - let actual_json: Vec = (0..actual_data_column.len()) - .map(|i| from_str(actual_data_column.value(i)).unwrap()) - .collect(); - - assert_eq!(actual_json, expected_json); } async fn arrow_postgres_one_way( @@ -342,8 +334,7 @@ async fn arrow_postgres_one_way( extra_stmt: Option<&str>, expected_record: RecordBatch, unsupported_type_action: UnsupportedTypeAction, - perform_check: bool, -) -> Vec { +) { tracing::debug!("Running tests on {table_name}"); let ctx = SessionContext::new(); @@ -379,7 +370,7 @@ async fn arrow_postgres_one_way( // Register datafusion table, test row -> arrow conversion let sqltable_pool: Arc = Arc::new(pool); - let table = SqlTable::new("postgres", &sqltable_pool, table_name, None) + let table = SqlTable::new("postgres", &sqltable_pool, table_name) .await .expect("Table should be created"); ctx.register_table(table_name, Arc::new(table)) @@ -392,9 +383,5 @@ async fn arrow_postgres_one_way( let record_batch = df.collect().await.expect("RecordBatch should be collected"); - if perform_check { - assert_eq!(record_batch[0], expected_record); - } - - record_batch + assert_eq!(record_batch[0], expected_record); } diff --git a/core/tests/postgres/schema.rs b/core/tests/postgres/schema.rs index 39b522b8..907cb8c9 100644 --- a/core/tests/postgres/schema.rs +++ b/core/tests/postgres/schema.rs @@ -14,7 +14,7 @@ use datafusion_table_providers::postgres::PostgresTableFactory; use datafusion_table_providers::sql::db_connection_pool::postgrespool::PostgresConnectionPool; use datafusion_table_providers::util::secrets::to_secret_map; -const COMPLEX_TABLE_SQL: &str = include_str!("scripts/complex_table_pg.sql"); +const COMPLEX_TABLE_SQL: &str = include_str!("scripts/complex_table.sql"); fn get_schema() -> SchemaRef { let fields = vec![ @@ -43,7 +43,7 @@ fn get_schema() -> SchemaRef { #[tokio::test] async fn test_postgres_schema_inference() { let port = crate::get_random_port(); - let container = common::start_postgres_docker_container("postgres:latest", port, None) + let container = common::start_postgres_docker_container(port) .await .expect("Postgres container to start"); @@ -63,7 +63,7 @@ async fn test_postgres_schema_inference() { order_exprs: vec![], unbounded: false, options: common::get_pg_params(port), - constraints: Constraints::new_unverified(vec![]), + constraints: Constraints::default(), column_defaults: HashMap::new(), temporary: false, }; @@ -95,7 +95,7 @@ async fn test_postgres_schema_inference() { #[tokio::test] async fn test_postgres_schema_inference_complex_types() { let port = crate::get_random_port(); - let container = common::start_postgres_docker_container("postgres:latest", port, None) + let container = common::start_postgres_docker_container(port) .await .expect("Postgres container to start"); @@ -138,7 +138,7 @@ async fn test_postgres_schema_inference_complex_types() { #[tokio::test] async fn test_postgres_view_schema_inference() { let port = crate::get_random_port(); - let container = common::start_postgres_docker_container("postgres:latest", port, None) + let container = common::start_postgres_docker_container(port) .await .expect("Postgres container to start"); @@ -160,7 +160,7 @@ async fn test_postgres_view_schema_inference() { .conn .execute(cmd, &[]) .await - .expect("executing SQL from complex_table_pg.sql"); + .expect("executing SQL from complex_table.sql"); } let table_factory = PostgresTableFactory::new(postgres_pool.clone()); @@ -182,7 +182,7 @@ async fn test_postgres_view_schema_inference() { #[tokio::test] async fn test_postgres_materialized_view_schema_inference() { let port = crate::get_random_port(); - let container = common::start_postgres_docker_container("postgres:latest", port, None) + let container = common::start_postgres_docker_container(port) .await .expect("Postgres container to start"); @@ -204,7 +204,7 @@ async fn test_postgres_materialized_view_schema_inference() { .conn .execute(cmd, &[]) .await - .expect("executing SQL from complex_table_pg.sql"); + .expect("executing SQL from complex_table.sql"); } let table_factory = PostgresTableFactory::new(postgres_pool); diff --git a/core/tests/postgres/scripts/complex_table_pg.sql b/core/tests/postgres/scripts/complex_table_pg.sql index b4b2e50d..6bfdb081 100644 --- a/core/tests/postgres/scripts/complex_table_pg.sql +++ b/core/tests/postgres/scripts/complex_table_pg.sql @@ -49,6 +49,9 @@ CREATE TABLE example_table ( int_array_col INTEGER[], text_array_col TEXT[], + -- Range types + int_range_col INT4RANGE, + -- Custom composite type composite_col complex_type ); diff --git a/core/tests/postgres/snapshots/integration__postgres__schema__postgres_materialized_view_schema_inference.snap b/core/tests/postgres/snapshots/integration__postgres__schema__postgres_materialized_view_schema_inference.snap index e99f533b..d152bc46 100644 --- a/core/tests/postgres/snapshots/integration__postgres__schema__postgres_materialized_view_schema_inference.snap +++ b/core/tests/postgres/snapshots/integration__postgres__schema__postgres_materialized_view_schema_inference.snap @@ -1,5 +1,5 @@ --- -source: tests/postgres/schema.rs +source: core/tests/postgres/schema.rs expression: pretty_schema --- Schema { @@ -227,6 +227,33 @@ Schema { dict_is_ordered: false, metadata: {}, }, + Field { + name: "int_range_col", + data_type: Struct( + [ + Field { + name: "lower", + data_type: Int32, + nullable: true, + dict_id: 0, + dict_is_ordered: false, + metadata: {}, + }, + Field { + name: "upper", + data_type: Int32, + nullable: true, + dict_id: 0, + dict_is_ordered: false, + metadata: {}, + }, + ], + ), + nullable: true, + dict_id: 0, + dict_is_ordered: false, + metadata: {}, + }, Field { name: "composite_col", data_type: Struct( diff --git a/core/tests/postgres/snapshots/integration__postgres__schema__postgres_schema_inference_complex_types.snap b/core/tests/postgres/snapshots/integration__postgres__schema__postgres_schema_inference_complex_types.snap index 3644be2b..9d4a5cec 100644 --- a/core/tests/postgres/snapshots/integration__postgres__schema__postgres_schema_inference_complex_types.snap +++ b/core/tests/postgres/snapshots/integration__postgres__schema__postgres_schema_inference_complex_types.snap @@ -227,6 +227,33 @@ Schema { dict_is_ordered: false, metadata: {}, }, + Field { + name: "int_range_col", + data_type: Struct( + [ + Field { + name: "lower", + data_type: Int32, + nullable: true, + dict_id: 0, + dict_is_ordered: false, + metadata: {}, + }, + Field { + name: "upper", + data_type: Int32, + nullable: true, + dict_id: 0, + dict_is_ordered: false, + metadata: {}, + }, + ], + ), + nullable: true, + dict_id: 0, + dict_is_ordered: false, + metadata: {}, + }, Field { name: "composite_col", data_type: Struct( diff --git a/core/tests/postgres/snapshots/integration__postgres__schema__postgres_view_schema_inference.snap b/core/tests/postgres/snapshots/integration__postgres__schema__postgres_view_schema_inference.snap index e99f533b..d152bc46 100644 --- a/core/tests/postgres/snapshots/integration__postgres__schema__postgres_view_schema_inference.snap +++ b/core/tests/postgres/snapshots/integration__postgres__schema__postgres_view_schema_inference.snap @@ -1,5 +1,5 @@ --- -source: tests/postgres/schema.rs +source: core/tests/postgres/schema.rs expression: pretty_schema --- Schema { @@ -227,6 +227,33 @@ Schema { dict_is_ordered: false, metadata: {}, }, + Field { + name: "int_range_col", + data_type: Struct( + [ + Field { + name: "lower", + data_type: Int32, + nullable: true, + dict_id: 0, + dict_is_ordered: false, + metadata: {}, + }, + Field { + name: "upper", + data_type: Int32, + nullable: true, + dict_id: 0, + dict_is_ordered: false, + metadata: {}, + }, + ], + ), + nullable: true, + dict_id: 0, + dict_is_ordered: false, + metadata: {}, + }, Field { name: "composite_col", data_type: Struct( diff --git a/core/tests/sqlite/mod.rs b/core/tests/sqlite/mod.rs index ffde24b3..d9f8b928 100644 --- a/core/tests/sqlite/mod.rs +++ b/core/tests/sqlite/mod.rs @@ -12,6 +12,7 @@ use datafusion::execution::context::SessionContext; use datafusion::logical_expr::{dml::InsertOp, CreateExternalTable}; use datafusion::physical_plan::collect; use datafusion::sql::TableReference; +#[cfg(feature = "sqlite-federation")] use datafusion_federation::schema_cast::record_convert::try_cast_to; use datafusion_table_providers::sql::arrow_sql_gen::statement::{ CreateTableBuilder, InsertBuilder, @@ -83,12 +84,10 @@ async fn arrow_sqlite_round_trip( } let table = match projected_schema { - None => SqlTable::new("sqlite", &sqltable_pool, table_name, None) + None => SqlTable::new("sqlite", &sqltable_pool, table_name) .await .expect("Table should be created"), - Some(schema) => { - SqlTable::new_with_schema("sqlite", &sqltable_pool, schema, table_name, None) - } + Some(schema) => SqlTable::new_with_schema("sqlite", &sqltable_pool, schema, table_name), }; ctx.register_table(table_name, Arc::new(table)) @@ -102,6 +101,7 @@ async fn arrow_sqlite_round_trip( let record_batch = df.collect().await.expect("RecordBatch should be collected"); + #[cfg(feature = "sqlite-federation")] let casted_record = try_cast_to(record_batch[0].clone(), Arc::clone(&source_schema)).unwrap(); @@ -115,6 +115,7 @@ async fn arrow_sqlite_round_trip( assert_eq!(record_batch.len(), 1); assert_eq!(record_batch[0].num_rows(), arrow_record.num_rows()); assert_eq!(record_batch[0].num_columns(), arrow_record.num_columns()); + #[cfg(feature = "sqlite-federation")] assert_eq!(casted_record, arrow_record); } } @@ -128,6 +129,7 @@ async fn arrow_sqlite_round_trip( #[case::timestamp(get_arrow_timestamp_record_batch(), "timestamp")] #[case::date(get_arrow_date_record_batch(), "date")] #[case::struct_type(get_arrow_struct_record_batch(), "struct")] +#[ignore] // Requires a custom sqlite extension for decimal types #[case::decimal(get_arrow_decimal_record_batch(), "decimal")] #[ignore] // TODO: interval types are broken in SQLite - Interval is not available in Sqlite. #[case::interval(get_arrow_interval_record_batch(), "interval")] @@ -209,9 +211,7 @@ fn create_comprehensive_test_data() -> (RecordBatch, SchemaRef) { None, Some(500000u64), ]); - #[allow(clippy::approx_constant)] let col_float32 = Float32Array::from(vec![Some(1.5), None, Some(-3.14), Some(2.71)]); - #[allow(clippy::approx_constant)] let col_float64 = Float64Array::from(vec![None, Some(2.718281828), Some(-1.414), Some(3.14159)]); let col_utf8 = StringArray::from(vec![Some("hello"), Some("world"), None, Some("test")]); diff --git a/python/.cargo/config.toml b/python/.cargo/config.toml new file mode 100644 index 00000000..91a099a6 --- /dev/null +++ b/python/.cargo/config.toml @@ -0,0 +1,12 @@ +[target.x86_64-apple-darwin] +rustflags = [ + "-C", "link-arg=-undefined", + "-C", "link-arg=dynamic_lookup", +] + +[target.aarch64-apple-darwin] +rustflags = [ + "-C", "link-arg=-undefined", + "-C", "link-arg=dynamic_lookup", +] + diff --git a/python/.gitignore b/python/.gitignore new file mode 100644 index 00000000..9ea5de3e --- /dev/null +++ b/python/.gitignore @@ -0,0 +1,26 @@ +/venv +.idea +.DS_Store +.vscode + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# Python dist ignore +dist + +# C extensions +*.so + +# Python dist +dist + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +.python-version +venv +.venv + diff --git a/python/Cargo.toml b/python/Cargo.toml new file mode 100644 index 00000000..c50cd491 --- /dev/null +++ b/python/Cargo.toml @@ -0,0 +1,47 @@ +[package] +name = "datafusion-table-providers-python" +version = { workspace = true } +readme = { workspace = true } +edition = { workspace = true } +repository = { workspace = true } +license = { workspace = true } +description = { workspace = true } +publish = false + +[lib] +name = "datafusion_table_providers" +crate-type = ["cdylib"] +doc = false + +[dependencies] +arrow = { workspace = true } +arrow-flight = { workspace = true, optional = true } +datafusion = { workspace = true, features = ["pyarrow"] } +datafusion-ffi = { workspace = true } +datafusion-table-providers = { workspace = true } +pyo3 = { version = "0.24.2" } +tokio = { version = "1.46", features = [ + "macros", + "rt", + "rt-multi-thread", + "sync", +] } +duckdb = { workspace = true, optional = true } + +[features] +default = [ + "duckdb", + "clickhouse", + "sqlite", + "mysql", + "postgres", + "odbc", + "flight", +] +clickhouse = ["datafusion-table-providers/clickhouse-federation"] +duckdb = ["dep:duckdb", "datafusion-table-providers/duckdb-federation"] +sqlite = ["datafusion-table-providers/sqlite-federation"] +mysql = ["datafusion-table-providers/mysql-federation"] +postgres = ["datafusion-table-providers/postgres-federation"] +odbc = ["datafusion-table-providers/odbc-federation"] +flight = ["dep:arrow-flight", "datafusion-table-providers/flight"] diff --git a/python/examples/clickhouse_demo.py b/python/examples/clickhouse_demo.py new file mode 100644 index 00000000..31533b45 --- /dev/null +++ b/python/examples/clickhouse_demo.py @@ -0,0 +1,17 @@ +from datafusion import SessionContext +from datafusion_table_providers import clickhouse + +ctx = SessionContext() +connection_param = { + "url": "http://localhost:8123", + "database": "default", + "user": "user", + "password": "secret" +} +pool = clickhouse.ClickHouseTableFactory(connection_param) +tables = pool.tables() + +for t in tables: + ctx.register_table_provider(t, pool.get_table(t)) + print("Checking table:", t) + ctx.table(t).show() diff --git a/python/examples/duckdb_demo.py b/python/examples/duckdb_demo.py new file mode 100644 index 00000000..49546427 --- /dev/null +++ b/python/examples/duckdb_demo.py @@ -0,0 +1,11 @@ +from datafusion import SessionContext +from datafusion_table_providers import duckdb + +ctx = SessionContext() +pool = duckdb.DuckDBTableFactory("../../core/examples/duckdb_example.db", duckdb.AccessMode.ReadOnly) +tables = pool.tables() + +for t in tables: + ctx.register_table_provider(t, pool.get_table(t)) + print("Checking table:", t) + ctx.table(t).show() diff --git a/python/examples/flight_demo.py b/python/examples/flight_demo.py new file mode 100644 index 00000000..73e09785 --- /dev/null +++ b/python/examples/flight_demo.py @@ -0,0 +1,14 @@ +from datafusion import SessionContext +from datafusion_table_providers import flight + +ctx = SessionContext() +pool = flight.FlightTableFactory() +table_provider = pool.get_table("http://localhost:32010", {"flight.sql.query": "SELECT * FROM taxi"}) +table_name = "taxi_flight_table" +ctx.register_table_provider(table_name, table_provider) +ctx.sql(f""" + SELECT "VendorID", COUNT(*), SUM(passenger_count), SUM(total_amount) + FROM {table_name} + GROUP BY "VendorID" + ORDER BY COUNT(*) DESC + """).show() diff --git a/python/examples/mysql_demo.py b/python/examples/mysql_demo.py new file mode 100644 index 00000000..f708d78c --- /dev/null +++ b/python/examples/mysql_demo.py @@ -0,0 +1,14 @@ +from datafusion import SessionContext +from datafusion_table_providers import mysql + +ctx = SessionContext() +connection_param = { + "connection_string": "mysql://root:password@localhost:3306/mysql_db", + "sslmode": "disabled"} +pool = mysql.MySQLTableFactory(connection_param) +tables = pool.tables() + +for t in tables: + ctx.register_table_provider(t, pool.get_table(t)) + print("Checking table:", t) + ctx.table(t).show() diff --git a/python/examples/odbc_sqlite_demo.py b/python/examples/odbc_sqlite_demo.py new file mode 100644 index 00000000..d633a50b --- /dev/null +++ b/python/examples/odbc_sqlite_demo.py @@ -0,0 +1,9 @@ +from datafusion import SessionContext +from datafusion_table_providers import odbc + +ctx = SessionContext() +connection_param: dict = {'connection_string': 'driver=SQLite3;database=../../core/examples/sqlite_example.db;'} +pool = odbc.ODBCTableFactory(connection_param) + +ctx.register_table_provider(name = "companies", provider = pool.get_table("companies")) +ctx.table("companies").show() diff --git a/python/examples/postgres_demo.py b/python/examples/postgres_demo.py new file mode 100644 index 00000000..c59f33af --- /dev/null +++ b/python/examples/postgres_demo.py @@ -0,0 +1,18 @@ +from datafusion import SessionContext +from datafusion_table_providers import postgres + +ctx = SessionContext() +connection_param = { + "host": "localhost", + "user": "postgres", + "db": "postgres_db", + "pass": "password", + "port": "5432", + "sslmode": "disable"} +pool = postgres.PostgresTableFactory(connection_param) +tables = pool.tables() + +for t in tables: + ctx.register_table_provider(t, pool.get_table(t)) + print("Checking table:", t) + ctx.table(t).show() diff --git a/python/examples/sqlite_demo.py b/python/examples/sqlite_demo.py new file mode 100644 index 00000000..fd1b376b --- /dev/null +++ b/python/examples/sqlite_demo.py @@ -0,0 +1,11 @@ +from datafusion import SessionContext +from datafusion_table_providers import sqlite + +ctx = SessionContext() +pool = sqlite.SqliteTableFactory("../../core/examples/sqlite_example.db", "file", 3.0, None) +tables = pool.tables() + +for t in tables: + ctx.register_table_provider(t, pool.get_table(t)) + print("Checking table:", t) + ctx.table(t).show() diff --git a/python/pyproject.toml b/python/pyproject.toml new file mode 100644 index 00000000..268daab8 --- /dev/null +++ b/python/pyproject.toml @@ -0,0 +1,71 @@ +[build-system] +requires = ["maturin>=1.5.1,<1.6.0"] +build-backend = "maturin" + +[project] +name = "datafusion_table_providers" +version = "0.1.0" +description = "Build and run queries against data" +readme = "../README.md" +license = { file = "../LICENSE" } +requires-python = ">=3.9" +keywords = ["datafusion", "dataframe", "rust", "query-engine"] +classifier = [ + "Development Status :: 2 - Pre-Alpha", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "License :: OSI Approved", + "Operating System :: MacOS", + "Operating System :: Microsoft :: Windows", + "Operating System :: POSIX :: Linux", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python", + "Programming Language :: Rust", +] +dependencies = ["datafusion>=45.0.0"] + +[project.urls] +repository = "https://github.com/datafusion-contrib/datafusion-table-providers" + +[tool.isort] +profile = "black" + +[tool.maturin] +python-source = "python" +module-name = "datafusion_table_providers._internal" +include = [{ path = "../Cargo.lock", format = "sdist" }] +exclude = [".github/**", "ci/**", ".asf.yaml"] +# Require Cargo.lock is up to date +locked = true + +# Enable docstring linting using the google style guide +[tool.ruff.lint] +select = ["E4", "E7", "E9", "F", "D", "W"] + +[tool.ruff.lint.pydocstyle] +convention = "google" + +[tool.ruff.lint.pycodestyle] +max-doc-length = 88 + +# Disable docstring checking for these directories +[tool.ruff.lint.per-file-ignores] +"python/tests/*" = ["D"] +"examples/*" = ["D", "W505"] +"dev/*" = ["D"] +"benchmarks/*" = ["D", "F"] +"docs/*" = ["D"] + +[dependency-groups] +dev = [ + "maturin>=1.8.1", + "numpy>1.25.0", + "pytest>=7.4.4", + "pytest-asyncio>=0.23.3", + "ruff>=0.9.1", + "toml>=0.10.2", + "pygithub==2.5.0", + "pyarrow>=19.0.1" +] diff --git a/python/python/datafusion_table_providers/__init__.py b/python/python/datafusion_table_providers/__init__.py new file mode 100644 index 00000000..02cc74ad --- /dev/null +++ b/python/python/datafusion_table_providers/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Python package for datafusion table provider.""" diff --git a/python/python/datafusion_table_providers/clickhouse.py b/python/python/datafusion_table_providers/clickhouse.py new file mode 100644 index 00000000..5a8a2f97 --- /dev/null +++ b/python/python/datafusion_table_providers/clickhouse.py @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Python interface for Clickhouse table provider.""" + +from typing import Any, List +from . import _internal + + +class ClickHouseTableFactory: + """ClickHouse table factory.""" + + def __init__(self, params: dict) -> None: + """Create a ClickHouse table factory.""" + self._raw = _internal.clickhouse.RawClickHouseTableFactory(params) + + def tables(self) -> List[str]: + """Get all the table names.""" + return self._raw.tables() + + def get_table(self, table_reference: str, args=None) -> Any: + """Return the table provider for table named `table_reference`. + + Args: + table_reference (str): table name + args: optional list of parameter tuples (name, value) + """ + return self._raw.get_table(table_reference, args) diff --git a/python/python/datafusion_table_providers/duckdb.py b/python/python/datafusion_table_providers/duckdb.py new file mode 100644 index 00000000..de867b33 --- /dev/null +++ b/python/python/datafusion_table_providers/duckdb.py @@ -0,0 +1,61 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Python interface for DuckDB table provider.""" + +from typing import Any, List +from . import _internal +from enum import Enum + +class AccessMode(Enum): + """Python equivalent of rust duckdb::AccessMode Enum.""" + Automatic = "AUTOMATIC" + ReadOnly = "READ_ONLY" + ReadWrite = "READ_WRITE" + +class DuckDBTableFactory: + """DuckDB table factory.""" + + def __init__(self, path: str, access_mode: AccessMode = AccessMode.Automatic) -> None: + """Create a DuckDB table factory. + + If creating an in-memory table factory, then specify path to be :memory: or none + and don't specify access_mode. If creating a file-based table factory, then + specify path and access_mode. + + Args: + path: Memory or file location + access_mode: Access mode configuration + """ + # TODO: think about the interface, restrict invalid combination of input + # arguments, for example, if path is memory, then access_mode should not be + # specified. + if path == ":memory:" or path == "": + self._raw = _internal.duckdb.RawDuckDBTableFactory.new_memory() + else: + self._raw = _internal.duckdb.RawDuckDBTableFactory.new_file(path, access_mode.value) + + def tables(self) -> List[str]: + """Get all the table names.""" + return self._raw.tables() + + def get_table(self, table_reference: str) -> Any: + """Return table provider for the table named `table_reference`. + + Args: + table_reference (str): table name + """ + return self._raw.get_table(table_reference) diff --git a/python/python/datafusion_table_providers/flight.py b/python/python/datafusion_table_providers/flight.py new file mode 100644 index 00000000..cc20c157 --- /dev/null +++ b/python/python/datafusion_table_providers/flight.py @@ -0,0 +1,36 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Python interface for sqlite table provider.""" + +from typing import Any, List +from . import _internal + +class FlightTableFactory: + """Flight table factory.""" + + def __init__(self) -> None: + """Create a Flight table factory.""" + self._raw = _internal.flight.RawFlightTableFactory() + + def get_table(self, entry_point: str, options: dict) -> Any: + """Return the table provider for table. + + Args: + entry_point: uri + options: table information + """ + return self._raw.get_table(entry_point, options) diff --git a/python/python/datafusion_table_providers/mysql.py b/python/python/datafusion_table_providers/mysql.py new file mode 100644 index 00000000..7aa0afe2 --- /dev/null +++ b/python/python/datafusion_table_providers/mysql.py @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Python interface for MySQL table provider.""" + +from typing import Any, List +from . import _internal + +class MySQLTableFactory: + """MySQL table factory.""" + + def __init__(self, params: dict) -> None: + """Create a MySQL table factory.""" + self._raw = _internal.mysql.RawMySQLTableFactory(params) + + def tables(self) -> List[str]: + """Get all the table names.""" + return self._raw.tables() + + def get_table(self, table_reference: str) -> Any: + """Return the table provider for table named `table_reference`. + + Args: + table_reference (str): table name + """ + return self._raw.get_table(table_reference) diff --git a/python/python/datafusion_table_providers/odbc.py b/python/python/datafusion_table_providers/odbc.py new file mode 100644 index 00000000..86907d61 --- /dev/null +++ b/python/python/datafusion_table_providers/odbc.py @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Python interface for ODBC table provider.""" + +from typing import Any, List +from . import _internal + +class ODBCTableFactory: + """ODBC table factory.""" + + def __init__(self, params: dict) -> None: + """Create an odbc table factory.""" + self._raw = _internal.odbc.RawODBCTableFactory(params) + + def get_table(self, table_reference: str) -> Any: + """Return the table provider for table named `table_reference`. + + Args: + table_reference (str): table name + """ + return self._raw.get_table(table_reference) diff --git a/python/python/datafusion_table_providers/postgres.py b/python/python/datafusion_table_providers/postgres.py new file mode 100644 index 00000000..cb11e851 --- /dev/null +++ b/python/python/datafusion_table_providers/postgres.py @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Python interface for Postgres table provider.""" + +from typing import Any, List +from . import _internal + +class PostgresTableFactory: + """Postgres table factory.""" + + def __init__(self, params: dict) -> None: + """Create a Postgres table factory.""" + self._raw = _internal.postgres.RawPostgresTableFactory(params) + + def tables(self) -> List[str]: + """Get all the table names.""" + return self._raw.tables() + + def get_table(self, table_reference: str) -> Any: + """Return the table provider for table named `table_reference`. + + Args: + table_reference (str): table name + """ + return self._raw.get_table(table_reference) diff --git a/python/python/datafusion_table_providers/sqlite.py b/python/python/datafusion_table_providers/sqlite.py new file mode 100644 index 00000000..845ec823 --- /dev/null +++ b/python/python/datafusion_table_providers/sqlite.py @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Python interface for sqlite table provider.""" + +from typing import Any, List, Optional +from . import _internal + +class SqliteTableFactory: + """Sqlite table factory.""" + + def __init__(self, path: str, mode: str, busy_timeout_s: float, attach_databases: Optional[List[str]] = None) -> None: + """Create a sqlite table factory.""" + self._raw = _internal.sqlite.RawSqliteTableFactory(path, mode, busy_timeout_s, attach_databases) + + def tables(self) -> List[str]: + """Get all the table names.""" + return self._raw.tables() + + def get_table(self, table_reference: str) -> Any: + """Return the table provider for table named `table_reference`. + + Args: + table_reference (str): table name + """ + return self._raw.get_table(table_reference) diff --git a/python/python/tests/test_clickhouse.py b/python/python/tests/test_clickhouse.py new file mode 100644 index 00000000..8b2984e7 --- /dev/null +++ b/python/python/tests/test_clickhouse.py @@ -0,0 +1,89 @@ +import subprocess +import time +from datafusion import SessionContext +from datafusion_table_providers import clickhouse # hypothetical provider + +def run_docker_container(): + subprocess.run( + ["docker", "run", "--name", "clickhouse", + "-e", "CLICKHOUSE_USER=user", + "-e", "CLICKHOUSE_PASSWORD=secret", + "-p", "8123:8123", + "-d", "clickhouse/clickhouse-server:latest"], + check=True, + ) + time.sleep(20) + +def create_schema(): + sql = r""" + CREATE TABLE companies ( + id UInt32, + name String, + founded Date, + revenue Decimal(18,2), + is_active Bool, + tags Array(String) + ) ENGINE = MergeTree() + ORDER BY id; + + INSERT INTO companies VALUES + (1, 'Acme Corporation', '1999-03-12', 12500000.50, 1, ['manufacturing', 'global']), + (2, 'Widget Inc.', '2005-07-01', 4500000.00, 1, ['gadgets', 'innovation']), + (3, 'Gizmo Corp.', '2010-11-23', 780000.75, 0, ['hardware']), + (4, 'Tech Solutions', '2018-01-15', 220000.00, 1, ['consulting','it']), + (5, 'Data Innovations', '2021-05-10', 98000.99, 1, ['analytics','startup']); + + CREATE VIEW companies_param_view AS + SELECT id, name, founded, revenue, is_active, tags + FROM companies + WHERE name = {name:String}; + """ + subprocess.run( + ["docker", "exec", "-i", "clickhouse", "clickhouse-client", "--multiquery", "--query", sql], + check=True + ) + +def stop_container(): + subprocess.run(["docker", "stop", "clickhouse"], check=True) + subprocess.run(["docker", "rm", "clickhouse"], check=True) + +class TestClickHouseParameterized: + @classmethod + def setup_class(cls): + run_docker_container() + create_schema() + cls.ctx = SessionContext() + connection_param = { + "url": "http://localhost:8123", + "database": "default", + "user": "user", + "password": "secret" + } + cls.pool = clickhouse.ClickHouseTableFactory(connection_param) + + @classmethod + def teardown_class(cls): + stop_container() + + def test_get_tables(self): + tables = self.pool.tables() + assert "companies" in tables + assert "companies_param_view" in tables + + def test_parameterized_view(self): + # Register provider so DF can access the view\ + + self.ctx.register_table_provider( + "companies_param_view", + self.pool.get_table("companies_param_view", [("name", "Gizmo Corp.")]) + ) + + # Query using parameter + df = self.ctx.sql( + "SELECT id, name, revenue FROM companies_param_view" + ) + rows = df.collect()[0] + + assert len(rows["name"]) == 1 + assert rows["name"][0].as_py() == "Gizmo Corp." + assert rows["revenue"][0].as_py() == 780000.75 diff --git a/python/python/tests/test_duckdb.py b/python/python/tests/test_duckdb.py new file mode 100644 index 00000000..4f28d3eb --- /dev/null +++ b/python/python/tests/test_duckdb.py @@ -0,0 +1,68 @@ +import pytest +import os +from datafusion import SessionContext +from datafusion_table_providers import duckdb + +class TestDuckDBIntegration: + def setup_method(self): + """Set up the test environment""" + self.ctx = SessionContext() + self.db_path = os.path.join(os.path.dirname(__file__), "..", "..", "..", "core", "examples", "duckdb_example.db") + self.pool_readonly = duckdb.DuckDBTableFactory(self.db_path, duckdb.AccessMode.ReadOnly) + self.pool_readwrite = duckdb.DuckDBTableFactory(self.db_path) + + def test_get_tables(self): + """Test retrieving tables from the database""" + tables = self.pool_readonly.tables() + assert isinstance(tables, list) + assert len(tables) == 2 + assert tables == ["companies", "projects"] + + def test_query_companies(self): + """Test querying companies table with SQL""" + self.ctx.register_table_provider("companies", self.pool_readonly.get_table("companies")) + + # Run SQL query to select Microsoft row + df = self.ctx.sql("SELECT name FROM companies WHERE ticker = 'MSFT'") + result = df.collect() + + # Verify single row returned with name = Microsoft + assert len(result) == 1 + assert str(result[0]['name'][0]) == "Microsoft" + + def test_complex_query(self): + """Test querying companies table with SQL""" + self.ctx.register_table_provider("companies", self.pool_readonly.get_table("companies")) + self.ctx.register_table_provider("projects", self.pool_readonly.get_table("projects")) + + # Run SQL query to select Microsoft row + df = self.ctx.sql( + """SELECT companies.id, companies.name as company_name, projects.name as project_name + FROM companies, projects + WHERE companies.id = projects.id""" + ) + result = df.collect() + + assert len(result) == 1 + assert str(result[0]['company_name'][0]) == "Microsoft" + assert str(result[0]['project_name'][0]) == "DataFusion" + + def test_write_fails(self): + """Test that writing fails on read-only mode""" + table_name = "companies" + self.ctx.register_table_provider(table_name, self.pool_readonly.get_table("companies")) + + with pytest.raises(Exception): + tmp = self.ctx.sql("INSERT INTO companies VALUES (3, 'Test Corp', 'TEST')") + tmp.collect() # this will trigger the execution of the query + + def test_write_fails_readwrite(self): + """Test that writing fails because it is not supported""" + # Insertion fails because duckdb does not implement write operations even when + # database is opened in read-write mode. + table_name = "companies" + self.ctx.register_table_provider(table_name, self.pool_readwrite.get_table("companies")) + + with pytest.raises(Exception): + tmp = self.ctx.sql("INSERT INTO companies VALUES (3, 'Test Corp', 'TEST')") + tmp.collect() diff --git a/python/python/tests/test_flight.py b/python/python/tests/test_flight.py new file mode 100644 index 00000000..6487da41 --- /dev/null +++ b/python/python/tests/test_flight.py @@ -0,0 +1,54 @@ +from datafusion import SessionContext +from datafusion_table_providers import flight +import pytest +import subprocess +import time + + +class TestFlightIntegration: + @classmethod + def setup_class(self): + """Called once before all test methods in the class""" + self.ctx = SessionContext() + self.pool = flight.FlightTableFactory() + self.process = subprocess.Popen( + ["roapi", "-t", "taxi=https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2024-01.parquet"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + # 20s timeout is required to ensure the server is running and data is loaded + # The timeout is determined by empirical testing + time.sleep(20) + + @classmethod + def teardown_class(self): + """Called once after all test methods in the class""" + self.process.kill() + + def test_query_companies(self): + """Test querying companies table with SQL""" + print("Running test_query_companies") + table_name = "taxi_flight_table" + self.ctx.register_table_provider(table_name, self.pool.get_table("http://localhost:32010", { + "flight.sql.query": "SELECT * FROM taxi" + })) + df = self.ctx.sql(f""" + SELECT "VendorID", COUNT(*) as count, SUM(passenger_count) as passenger_counts, SUM(total_amount) as total_amounts + FROM {table_name} + GROUP BY "VendorID" + ORDER BY COUNT(*) DESC + """) + result = df.collect() + + # Verify the results + vendor_ids = result[0]['VendorID'].tolist() + assert vendor_ids == [2, 1, 6] + + counts = result[0]['count'].tolist() + assert counts == [2234632, 729732, 260] + + passenger_counts = result[0]['passenger_counts'].tolist() + assert passenger_counts == [2971865, 810883, None] + + total_amounts = result[0]['total_amounts'].tolist() + assert total_amounts == pytest.approx([60602721.27, 18841261.98, 12401.03]) diff --git a/python/python/tests/test_mysql.py b/python/python/tests/test_mysql.py new file mode 100644 index 00000000..386157e1 --- /dev/null +++ b/python/python/tests/test_mysql.py @@ -0,0 +1,82 @@ +import subprocess +import time + +from datafusion import SessionContext +from datafusion_table_providers import mysql + +def run_docker_container(): + """Run the Docker container with the MySQL image""" + result = subprocess.run( + ["docker", "run", "--name", "mysql", "-e", "MYSQL_ROOT_PASSWORD=password", "-e", "MYSQL_DATABASE=mysql_db", + "-p", "3306:3306", "-d", "mysql:9.0"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + if result.returncode != 0: + print(f"Failed to start MySQL container: {result.stderr.decode()}") + +def create_table_and_insert_data(): + """Create a table and insert data into MySQL""" + sql_commands = """ + CREATE TABLE companies ( + id INT PRIMARY KEY, + name VARCHAR(100) + ); + + INSERT INTO companies (id, name) VALUES (1, 'Acme Corporation'); + """ + + # Execute the SQL commands inside the Docker container + result = subprocess.run( + ["docker", "exec", "-i", "mysql", "mysql", "-uroot", "-ppassword", "mysql_db"], + input=sql_commands.encode(), # Pass SQL commands to stdin + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + + # Check if the SQL execution was successful + if result.returncode != 0: + print(f"Error executing SQL commands: {result.stderr.decode()}") + else: + print(f"SQL commands executed successfully:\n{result.stdout.decode()}") + +def stop_and_remove_container(): + """Stop and remove the MySQL container after use""" + subprocess.run(["docker", "stop", "mysql"]) + subprocess.run(["docker", "rm", "mysql"]) + print("MySQL container stopped and removed.") + + +class TestMySQLIntegration: + @classmethod + def setup_class(self): + run_docker_container() + time.sleep(30) + create_table_and_insert_data() + time.sleep(10) + self.ctx = SessionContext() + connection_param = { + "connection_string": "mysql://root:password@localhost:3306/mysql_db", + "sslmode": "disabled"} + self.pool = mysql.MySQLTableFactory(connection_param) + + @classmethod + def teardown_class(self): + stop_and_remove_container() + + def test_get_tables(self): + """Test retrieving tables from the database""" + tables = self.pool.tables() + assert isinstance(tables, list) + assert len(tables) == 1 + assert tables == ["companies"] + + def test_query_companies(self): + """Test querying companies table with SQL""" + table_name = "companies" + self.ctx.register_table_provider(table_name, self.pool.get_table("companies")) + query = "SELECT * FROM companies" + df = self.ctx.sql(query).collect() + assert df is not None + name_column = df[0]['name'] + assert str(name_column[0]) == "Acme Corporation" diff --git a/python/python/tests/test_odbc.py b/python/python/tests/test_odbc.py new file mode 100644 index 00000000..78c1ae6f --- /dev/null +++ b/python/python/tests/test_odbc.py @@ -0,0 +1,49 @@ +import pytest +import os +from datafusion import SessionContext +from datafusion_table_providers import odbc + +class TestOdbcIntegration: + def setup_method(self): + """Set up the test environment""" + self.ctx = SessionContext() + connection_param: dict = {'connection_string': 'driver=SQLite3;database=../../../core/examples/sqlite_example.db;'} + self.pool = odbc.ODBCTableFactory(connection_param) + + def test_query_companies(self): + """Test querying companies table with SQL""" + self.ctx.register_table_provider("companies", self.pool.get_table("companies")) + + # Run SQL query to select Microsoft row + df = self.ctx.sql("SELECT name FROM companies WHERE ticker = 'MSFT'") + result = df.collect() + + # Verify single row returned with name = Microsoft + assert len(result) == 1 + assert str(result[0]['name'][0]) == "Microsoft" + + def test_complex_query(self): + """Test querying companies table with SQL""" + self.ctx.register_table_provider("companies", self.pool.get_table("companies")) + self.ctx.register_table_provider("projects", self.pool.get_table("projects")) + + # Run SQL query to select Microsoft row + df = self.ctx.sql( + """SELECT companies.id, companies.name as company_name, projects.name as project_name + FROM companies, projects + WHERE companies.id = projects.id""" + ) + result = df.collect() + + assert len(result) == 1 + assert str(result[0]['company_name'][0]) == "Microsoft" + assert str(result[0]['project_name'][0]) == "DataFusion" + + def test_write_fails(self): + """Test that writing fails because it is not supported""" + table_name = "companies" + self.ctx.register_table_provider(table_name, self.pool.get_table("companies")) + + with pytest.raises(Exception): + tmp = self.ctx.sql("INSERT INTO companies VALUES (3, 'Test Corp', 'TEST')") + tmp.collect() # this will trigger the execution of the query diff --git a/python/python/tests/test_postgres.py b/python/python/tests/test_postgres.py new file mode 100644 index 00000000..844ffac0 --- /dev/null +++ b/python/python/tests/test_postgres.py @@ -0,0 +1,108 @@ +import subprocess +import time + +from datafusion import SessionContext +from datafusion_table_providers import postgres + +def run_docker_container(): + """Run the Docker container with the postgres image""" + result = subprocess.run( + ["docker", "run", "--name", "postgres", "-e", "POSTGRES_PASSWORD=password", "-e", "POSTGRES_DB=postgres_db", + "-p", "5432:5432", "-d", "postgres:16-alpine"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + if result.returncode != 0: + raise RuntimeError(f"Failed to start postgres container: {result.stderr.decode()}") + +def create_table_and_insert_data(): + """Create a table and insert data into postgres""" + sql_commands = """ + CREATE TABLE companies ( + id INT PRIMARY KEY, + name VARCHAR(100) + ); + + INSERT INTO companies (id, name) VALUES + (1, 'Acme Corporation'), + (2, 'Widget Inc.'), + (3, 'Gizmo Corp.'), + (4, 'Tech Solutions'), + (5, 'Data Innovations'); + + CREATE VIEW companies_view AS + SELECT id, name FROM companies; + + CREATE MATERIALIZED VIEW companies_materialized_view AS + SELECT id, name FROM companies; + """ + + result = subprocess.run( + ["docker", "exec", "-i", "postgres", "psql", "-U", "postgres", "-d", "postgres_db"], + input=sql_commands.encode(), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + + if result.returncode != 0: + raise RuntimeError(f"Error executing SQL commands: {result.stderr.decode()}") + +def stop_and_remove_container() -> None: + """Stop and remove the postgres container after use.""" + subprocess.run(["docker", "stop", "postgres"], check=True) + subprocess.run(["docker", "rm", "postgres"], check=True) + + +class TestPostgresIntegration: + @classmethod + def setup_class(self): + run_docker_container() + time.sleep(30) + create_table_and_insert_data() + time.sleep(10) + self.ctx = SessionContext() + connection_param = { + "host": "localhost", + "user": "postgres", + "db": "postgres_db", + "pass": "password", + "port": "5432", + "sslmode": "disable"} + self.pool = postgres.PostgresTableFactory(connection_param) + + @classmethod + def teardown_class(self): + stop_and_remove_container() + + def test_get_tables(self): + """Test retrieving tables from the database""" + tables = self.pool.tables() + assert isinstance(tables, list) + assert len(tables) == 1 + assert tables == ["companies"] + + def test_query_companies(self): + """Test querying companies table with SQL.""" + table_name = "companies" + self.ctx.register_table_provider(table_name, self.pool.get_table("companies")) + + # Test basic query + query = "SELECT * FROM companies ORDER BY id" + result = self.ctx.sql(query).collect() + assert result is not None + + # Test data integrity + record_batch = result[0] + name_column = record_batch["name"] + + expected_companies = [ + "Acme Corporation", + "Widget Inc.", + "Gizmo Corp.", + "Tech Solutions", + "Data Innovations" + ] + + assert len(name_column) == len(expected_companies) + for i, expected in enumerate(expected_companies): + assert str(name_column[i]) == expected diff --git a/python/python/tests/test_sqlite.py b/python/python/tests/test_sqlite.py new file mode 100644 index 00000000..f6ea05dc --- /dev/null +++ b/python/python/tests/test_sqlite.py @@ -0,0 +1,56 @@ +import pytest +import os +from datafusion import SessionContext +from datafusion_table_providers import sqlite + +class TestSqliteIntegration: + def setup_method(self): + """Set up the test environment""" + self.ctx = SessionContext() + self.db_path = os.path.join(os.path.dirname(__file__), "..", "..", "..", "core", "examples", "sqlite_example.db") + self.pool = sqlite.SqliteTableFactory(self.db_path, "file", 3.0, None) + + def test_get_tables(self): + """Test retrieving tables from the database""" + tables = self.pool.tables() + assert isinstance(tables, list) + assert len(tables) == 2 + assert tables == ["companies", "projects"] + + def test_query_companies(self): + """Test querying companies table with SQL""" + self.ctx.register_table_provider("companies", self.pool.get_table("companies")) + + # Run SQL query to select Microsoft row + df = self.ctx.sql("SELECT name FROM companies WHERE ticker = 'MSFT'") + result = df.collect() + + # Verify single row returned with name = Microsoft + assert len(result) == 1 + assert str(result[0]['name'][0]) == "Microsoft" + + def test_complex_query(self): + """Test querying companies table with SQL""" + self.ctx.register_table_provider("companies", self.pool.get_table("companies")) + self.ctx.register_table_provider("projects", self.pool.get_table("projects")) + + # Run SQL query to select Microsoft row + df = self.ctx.sql( + """SELECT companies.id, companies.name as company_name, projects.name as project_name + FROM companies, projects + WHERE companies.id = projects.id""" + ) + result = df.collect() + + assert len(result) == 1 + assert str(result[0]['company_name'][0]) == "Microsoft" + assert str(result[0]['project_name'][0]) == "DataFusion" + + def test_write_fails(self): + """Test that writing fails because it is not supported""" + table_name = "companies" + self.ctx.register_table_provider(table_name, self.pool.get_table("companies")) + + with pytest.raises(Exception): + tmp = self.ctx.sql("INSERT INTO companies VALUES (3, 'Test Corp', 'TEST')") + tmp.collect() # this will trigger the execution of the query diff --git a/python/src/clickhouse.rs b/python/src/clickhouse.rs new file mode 100644 index 00000000..79088cfe --- /dev/null +++ b/python/src/clickhouse.rs @@ -0,0 +1,113 @@ +use std::sync::Arc; + +use datafusion_table_providers::{ + clickhouse::{Arg, ClickHouseTableFactory}, + sql::db_connection_pool::{clickhousepool::ClickHouseConnectionPool, DbConnectionPool}, + util::secrets::to_secret_map, +}; +use pyo3::{ + exceptions::PyTypeError, + prelude::*, + types::{PyDict, PyList}, +}; + +use crate::{ + utils::{pydict_to_hashmap, to_pyerr, wait_for_future}, + RawTableProvider, +}; + +#[pyclass(module = "datafusion_table_providers._internal.clickhouse")] +struct RawClickHouseTableFactory { + pool: Arc, + factory: ClickHouseTableFactory, +} + +#[pymethods] +impl RawClickHouseTableFactory { + #[new] + #[pyo3(signature = (params))] + pub fn new(py: Python, params: &Bound<'_, PyDict>) -> PyResult { + let params = to_secret_map(pydict_to_hashmap(params)?); + let pool = + Arc::new(wait_for_future(py, ClickHouseConnectionPool::new(params)).map_err(to_pyerr)?); + + Ok(Self { + factory: ClickHouseTableFactory::new(Arc::clone(&pool)), + pool, + }) + } + + pub fn tables(&self, py: Python) -> PyResult> { + wait_for_future(py, async { + let conn = self.pool.connect().await.map_err(to_pyerr)?; + let conn_async = conn.as_async().ok_or(to_pyerr( + "Unable to create connection to Postgres db".to_string(), + ))?; + let schemas = conn_async.schemas().await.map_err(to_pyerr)?; + + let mut tables = Vec::default(); + for schema in schemas { + let schema_tables = conn_async.tables(&schema).await.map_err(to_pyerr)?; + tables.extend(schema_tables); + } + + Ok(tables) + }) + } + + #[pyo3(signature = (table_reference, args=None))] + pub fn get_table( + &self, + py: Python, + table_reference: &str, + args: Option>, + ) -> PyResult { + let args_vec = if let Some(args) = args { + let seq = args.downcast_bound::(py).map_err(|_| { + PyTypeError::new_err("Argument must be list of int (signed/unsigned) or string") + })?; + + let arr: Result, _> = seq + .iter() + .map(|val| { + val.extract::<(String, u64)>() + .map(|x| (x.0, Arg::Unsigned(x.1))) + .or_else(|_| { + val.extract::<(String, i64)>() + .map(|x| (x.0, Arg::Signed(x.1))) + }) + .or_else(|_| { + val.extract::<(String, String)>() + .map(|x| (x.0, Arg::String(x.1))) + }) + }) + .collect(); + + let arr = arr.map_err(|_| { + PyTypeError::new_err("Argument must be list of int (signed/unsigned) or string") + })?; + + Some(arr) + } else { + None + }; + + let table = wait_for_future( + py, + self.factory + .table_provider(table_reference.into(), args_vec), + ) + .map_err(to_pyerr)?; + + Ok(RawTableProvider { + table, + supports_pushdown_filters: true, + }) + } +} + +pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; + + Ok(()) +} diff --git a/python/src/duckdb.rs b/python/src/duckdb.rs new file mode 100644 index 00000000..41efbaf3 --- /dev/null +++ b/python/src/duckdb.rs @@ -0,0 +1,80 @@ +use std::{str::FromStr, sync::Arc}; + +use datafusion_table_providers::{ + duckdb::DuckDBTableFactory, + sql::db_connection_pool::{duckdbpool::DuckDbConnectionPool, DbConnectionPool}, +}; +use duckdb::AccessMode; +use pyo3::prelude::*; + +use crate::{ + utils::{to_pyerr, wait_for_future}, + RawTableProvider, +}; + +#[pyclass(module = "datafusion_table_providers._internal.duckdb")] +struct RawDuckDBTableFactory { + pool: Arc, + factory: DuckDBTableFactory, +} + +#[pymethods] +impl RawDuckDBTableFactory { + #[staticmethod] + #[pyo3(signature = ())] + pub fn new_memory() -> PyResult { + let pool = Arc::new(DuckDbConnectionPool::new_memory().map_err(to_pyerr)?); + + Ok(Self { + factory: DuckDBTableFactory::new(Arc::clone(&pool)), + pool, + }) + } + + #[staticmethod] + #[pyo3(signature = (path, access_mode))] + pub fn new_file(path: &str, access_mode: &str) -> PyResult { + let access_mode = AccessMode::from_str(access_mode).map_err(to_pyerr)?; + let pool = Arc::new(DuckDbConnectionPool::new_file(path, &access_mode).map_err(to_pyerr)?); + + Ok(Self { + factory: DuckDBTableFactory::new(Arc::clone(&pool)), + pool, + }) + } + + pub fn tables(&self, py: Python) -> PyResult> { + wait_for_future(py, async { + let conn = self.pool.connect().await.map_err(to_pyerr)?; + + let conn_sync = conn + .as_sync() + .ok_or(to_pyerr("Unable to create synchronous DuckDB connection"))?; + let schemas = conn_sync.schemas().map_err(to_pyerr)?; + + let mut tables = Vec::default(); + for schema in schemas { + let schema_tables = conn_sync.tables(&schema).map_err(to_pyerr)?; + tables.extend(schema_tables); + } + + Ok(tables) + }) + } + + pub fn get_table(&self, py: Python, table_reference: &str) -> PyResult { + let table = wait_for_future(py, self.factory.table_provider(table_reference.into())) + .map_err(to_pyerr)?; + + Ok(RawTableProvider { + table, + supports_pushdown_filters: true, + }) + } +} + +pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; + + Ok(()) +} diff --git a/python/src/flight.rs b/python/src/flight.rs new file mode 100644 index 00000000..7254746a --- /dev/null +++ b/python/src/flight.rs @@ -0,0 +1,51 @@ +use std::sync::Arc; + +use datafusion::catalog::TableProvider; +use datafusion_table_providers::flight::{sql::FlightSqlDriver, FlightDriver, FlightTableFactory}; +use pyo3::{prelude::*, types::PyDict}; + +use crate::{ + utils::{pydict_to_hashmap, to_pyerr, wait_for_future}, + RawTableProvider, +}; + +#[pyclass(module = "datafusion_table_providers._internal.Flight")] +struct RawFlightTableFactory { + factory: FlightTableFactory, +} + +#[pymethods] +impl RawFlightTableFactory { + #[new] + #[pyo3(signature = ())] + pub fn new() -> PyResult { + let driver: Arc = Arc::new(FlightSqlDriver::new()); + + Ok(Self { + factory: FlightTableFactory::new(Arc::clone(&driver)), + }) + } + + pub fn get_table( + &self, + py: Python, + entry_point: &str, + options: &Bound<'_, PyDict>, + ) -> PyResult { + let options = pydict_to_hashmap(options)?; + let table: Arc = Arc::new( + wait_for_future(py, self.factory.open_table(entry_point, options)).map_err(to_pyerr)?, + ); + + Ok(RawTableProvider { + table, + supports_pushdown_filters: true, + }) + } +} + +pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; + + Ok(()) +} diff --git a/python/src/lib.rs b/python/src/lib.rs new file mode 100644 index 00000000..ecf2581c --- /dev/null +++ b/python/src/lib.rs @@ -0,0 +1,117 @@ +use std::{ + ffi::CString, + sync::{Arc, OnceLock}, +}; + +use datafusion::catalog::TableProvider; +use datafusion_ffi::table_provider::FFI_TableProvider; +use pyo3::{prelude::*, types::PyCapsule}; + +#[pyclass(module = "datafusion_table_providers._internal")] +struct RawTableProvider { + pub(crate) table: Arc, + pub(crate) supports_pushdown_filters: bool, +} + +#[inline] +pub(crate) fn get_tokio_runtime() -> &'static tokio::runtime::Runtime { + static RUNTIME: OnceLock = OnceLock::new(); + RUNTIME.get_or_init(|| tokio::runtime::Runtime::new().expect("Failed to create Tokio runtime")) +} + +#[pymethods] +impl RawTableProvider { + fn __datafusion_table_provider__<'py>( + &self, + py: Python<'py>, + ) -> PyResult> { + let name = CString::new("datafusion_table_provider").unwrap(); + + let runtime = if cfg!(feature = "clickhouse") { + Some(get_tokio_runtime().handle().clone()) + } else { + None + }; + + let provider = FFI_TableProvider::new( + Arc::clone(&self.table), + self.supports_pushdown_filters, + runtime, + ); + + PyCapsule::new(py, provider, Some(name.clone())) + } +} + +#[cfg(feature = "clickhouse")] +pub mod clickhouse; +#[cfg(feature = "duckdb")] +pub mod duckdb; +#[cfg(feature = "flight")] +pub mod flight; +#[cfg(feature = "mysql")] +pub mod mysql; +#[cfg(feature = "odbc")] +pub mod odbc; +#[cfg(feature = "postgres")] +pub mod postgres; +#[cfg(feature = "sqlite")] +pub mod sqlite; +pub mod utils; + +#[pymodule] +// module name need to match project name +fn _internal(py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; + + #[cfg(feature = "sqlite")] + { + let sqlite = PyModule::new(py, "sqlite")?; + sqlite::init_module(&sqlite)?; + m.add_submodule(&sqlite)?; + } + + #[cfg(feature = "duckdb")] + { + let duckdb = PyModule::new(py, "duckdb")?; + duckdb::init_module(&duckdb)?; + m.add_submodule(&duckdb)?; + } + + #[cfg(feature = "odbc")] + { + let odbc = PyModule::new(py, "odbc")?; + odbc::init_module(&odbc)?; + m.add_submodule(&odbc)?; + } + + #[cfg(feature = "mysql")] + { + let mysql = PyModule::new(py, "mysql")?; + mysql::init_module(&mysql)?; + m.add_submodule(&mysql)?; + } + + #[cfg(feature = "postgres")] + { + let postgres = PyModule::new(py, "postgres")?; + postgres::init_module(&postgres)?; + m.add_submodule(&postgres)?; + } + + #[cfg(feature = "flight")] + { + let flight = PyModule::new(py, "flight")?; + flight::init_module(&flight)?; + m.add_submodule(&flight)?; + } + + #[cfg(feature = "clickhouse")] + { + let clickhouse = PyModule::new(py, "clickhouse")?; + clickhouse::init_module(&clickhouse)?; + m.add_submodule(&clickhouse)?; + } + + Ok(()) +} diff --git a/python/src/mysql.rs b/python/src/mysql.rs new file mode 100644 index 00000000..69d599ac --- /dev/null +++ b/python/src/mysql.rs @@ -0,0 +1,69 @@ +use std::sync::Arc; + +use datafusion_table_providers::{ + mysql::MySQLTableFactory, + sql::db_connection_pool::{mysqlpool::MySQLConnectionPool, DbConnectionPool}, + util::secrets::to_secret_map, +}; +use pyo3::{prelude::*, types::PyDict}; + +use crate::{ + utils::{pydict_to_hashmap, to_pyerr, wait_for_future}, + RawTableProvider, +}; + +#[pyclass(module = "datafusion_table_providers._internal.mysql")] +struct RawMySQLTableFactory { + pool: Arc, + factory: MySQLTableFactory, +} + +#[pymethods] +impl RawMySQLTableFactory { + #[new] + #[pyo3(signature = (params))] + pub fn new(py: Python, params: &Bound<'_, PyDict>) -> PyResult { + let params = to_secret_map(pydict_to_hashmap(params)?); + let pool = + Arc::new(wait_for_future(py, MySQLConnectionPool::new(params)).map_err(to_pyerr)?); + + Ok(Self { + factory: MySQLTableFactory::new(Arc::clone(&pool)), + pool, + }) + } + + pub fn tables(&self, py: Python) -> PyResult> { + wait_for_future(py, async { + let conn = self.pool.connect().await.map_err(to_pyerr)?; + let conn_async = conn.as_async().ok_or(to_pyerr( + "Unable to create connection to Mysql db".to_string(), + ))?; + let schemas = conn_async.schemas().await.map_err(to_pyerr)?; + + let mut tables = Vec::default(); + for schema in schemas { + let schema_tables = conn_async.tables(&schema).await.map_err(to_pyerr)?; + tables.extend(schema_tables); + } + + Ok(tables) + }) + } + + pub fn get_table(&self, py: Python, table_reference: &str) -> PyResult { + let table = wait_for_future(py, self.factory.table_provider(table_reference.into())) + .map_err(to_pyerr)?; + + Ok(RawTableProvider { + table, + supports_pushdown_filters: true, + }) + } +} + +pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; + + Ok(()) +} diff --git a/python/src/odbc.rs b/python/src/odbc.rs new file mode 100644 index 00000000..04a7e546 --- /dev/null +++ b/python/src/odbc.rs @@ -0,0 +1,66 @@ +use std::{collections::HashMap, sync::Arc}; + +use datafusion_table_providers::{ + odbc::ODBCTableFactory, sql::db_connection_pool::odbcpool::ODBCPool, + util::secrets::to_secret_map, +}; +use pyo3::{prelude::*, types::PyDict}; + +use crate::{ + utils::{to_pyerr, wait_for_future}, + RawTableProvider, +}; + +#[pyclass(module = "datafusion_table_providers._internal.odbc")] +struct RawODBCTableFactory { + _pool: Arc, + // TODO: 'static lifetime might be wrong, we want the lifetime to be 'py but it is + // still unclear how to define it. + factory: ODBCTableFactory<'static>, +} + +#[pymethods] +impl RawODBCTableFactory { + #[new] + #[pyo3(signature = (params))] + pub fn new(params: &Bound<'_, PyDict>) -> PyResult { + // Convert Python dict into Rust hashmap, and convert it to secret map + let mut hashmap = HashMap::new(); + for (key, value) in params.iter() { + let key: String = key.extract()?; + let value: String = value.extract()?; + hashmap.insert(key, value); + } + let hashmap = to_secret_map(hashmap); + + let pool = Arc::new(ODBCPool::new(hashmap).map_err(to_pyerr)?); + Ok(Self { + factory: ODBCTableFactory::new(pool.clone()), + _pool: pool, + }) + } + + pub fn tables(&self) -> PyResult> { + // This method is not supported yet because of unimplemented traits in odbcconn. + unimplemented!(); + } + + pub fn get_table(&self, py: Python, table_reference: &str) -> PyResult { + let table = wait_for_future( + py, + self.factory.table_provider(table_reference.into(), None), + ) + .map_err(to_pyerr)?; + + Ok(RawTableProvider { + table, + supports_pushdown_filters: true, + }) + } +} + +pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; + + Ok(()) +} diff --git a/python/src/postgres.rs b/python/src/postgres.rs new file mode 100644 index 00000000..086e648a --- /dev/null +++ b/python/src/postgres.rs @@ -0,0 +1,69 @@ +use std::sync::Arc; + +use datafusion_table_providers::{ + postgres::PostgresTableFactory, + sql::db_connection_pool::{postgrespool::PostgresConnectionPool, DbConnectionPool}, + util::secrets::to_secret_map, +}; +use pyo3::{prelude::*, types::PyDict}; + +use crate::{ + utils::{pydict_to_hashmap, to_pyerr, wait_for_future}, + RawTableProvider, +}; + +#[pyclass(module = "datafusion_table_providers._internal.postgres")] +struct RawPostgresTableFactory { + pool: Arc, + factory: PostgresTableFactory, +} + +#[pymethods] +impl RawPostgresTableFactory { + #[new] + #[pyo3(signature = (params))] + pub fn new(py: Python, params: &Bound<'_, PyDict>) -> PyResult { + let params = to_secret_map(pydict_to_hashmap(params)?); + let pool = + Arc::new(wait_for_future(py, PostgresConnectionPool::new(params)).map_err(to_pyerr)?); + + Ok(Self { + factory: PostgresTableFactory::new(Arc::clone(&pool)), + pool, + }) + } + + pub fn tables(&self, py: Python) -> PyResult> { + wait_for_future(py, async { + let conn = self.pool.connect().await.map_err(to_pyerr)?; + let conn_async = conn.as_async().ok_or(to_pyerr( + "Unable to create connection to Postgres db".to_string(), + ))?; + let schemas = conn_async.schemas().await.map_err(to_pyerr)?; + + let mut tables = Vec::default(); + for schema in schemas { + let schema_tables = conn_async.tables(&schema).await.map_err(to_pyerr)?; + tables.extend(schema_tables); + } + + Ok(tables) + }) + } + + pub fn get_table(&self, py: Python, table_reference: &str) -> PyResult { + let table = wait_for_future(py, self.factory.table_provider(table_reference.into())) + .map_err(to_pyerr)?; + + Ok(RawTableProvider { + table, + supports_pushdown_filters: true, + }) + } +} + +pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; + + Ok(()) +} diff --git a/python/src/sqlite.rs b/python/src/sqlite.rs new file mode 100644 index 00000000..c91c6e3f --- /dev/null +++ b/python/src/sqlite.rs @@ -0,0 +1,80 @@ +use std::{sync::Arc, time::Duration}; + +use datafusion_table_providers::{ + sql::db_connection_pool::{ + sqlitepool::{SqliteConnectionPool, SqliteConnectionPoolFactory}, + DbConnectionPool, + }, + sqlite::SqliteTableFactory, +}; +use pyo3::prelude::*; + +use crate::{ + utils::{to_pyerr, wait_for_future}, + RawTableProvider, +}; + +#[pyclass(module = "datafusion_table_providers._internal.sqlite")] +struct RawSqliteTableFactory { + pool: Arc, + factory: SqliteTableFactory, +} + +#[pymethods] +impl RawSqliteTableFactory { + #[new] + #[pyo3(signature = (path, mode, busy_timeout_s, attach_databases = None))] + pub fn new( + py: Python, + path: &str, + mode: String, + busy_timeout_s: f64, + attach_databases: Option>, + ) -> PyResult { + let mode = mode.as_str().into(); + let busy_timeout = Duration::from_secs_f64(busy_timeout_s); + let attach_databases = attach_databases.map(|d| d.into_iter().map(Arc::from).collect()); + let factory = SqliteConnectionPoolFactory::new(path, mode, busy_timeout) + .with_databases(attach_databases); + let pool = Arc::new(wait_for_future(py, factory.build()).map_err(to_pyerr)?); + + Ok(Self { + factory: SqliteTableFactory::new(Arc::clone(&pool)), + pool, + }) + } + + pub fn tables(&self, py: Python) -> PyResult> { + wait_for_future(py, async { + let conn = self.pool.connect().await.map_err(to_pyerr)?; + let conn_async = conn.as_async().ok_or(to_pyerr( + "Unable to create connection to sqlite db".to_string(), + ))?; + let schemas = conn_async.schemas().await.map_err(to_pyerr)?; + + let mut tables = Vec::default(); + for schema in schemas { + let schema_tables = conn_async.tables(&schema).await.map_err(to_pyerr)?; + tables.extend(schema_tables); + } + + Ok(tables) + }) + } + + pub fn get_table(&self, py: Python, table_reference: &str) -> PyResult { + let table = wait_for_future(py, self.factory.table_provider(table_reference.into())) + .map_err(to_pyerr)?; + + Ok(RawTableProvider { + table, + supports_pushdown_filters: true, + }) + } +} + +pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; + + Ok(()) +} diff --git a/python/src/utils.rs b/python/src/utils.rs new file mode 100644 index 00000000..cd14a8d1 --- /dev/null +++ b/python/src/utils.rs @@ -0,0 +1,29 @@ +use pyo3::{exceptions::PyException, prelude::*}; +use std::future::Future; + +use datafusion_table_providers::sql::db_connection_pool::runtime::execute_in_tokio; +use pyo3::types::PyDict; +use std::collections::HashMap; + +/// Utility to collect rust futures with GIL released +pub fn wait_for_future(py: Python, f: F) -> F::Output +where + F: Future + Send, + F::Output: Send, +{ + py.allow_threads(|| execute_in_tokio(|| f)) +} + +pub fn to_pyerr(err: T) -> PyErr { + PyException::new_err(err.to_string()) +} + +pub fn pydict_to_hashmap(pydict: &Bound<'_, PyDict>) -> PyResult> { + let mut map = HashMap::new(); + for (key, value) in pydict.iter() { + let key_str: String = key.extract()?; + let value_str: String = value.extract()?; + map.insert(key_str, value_str); + } + Ok(map) +} diff --git a/python/uv.lock b/python/uv.lock new file mode 100644 index 00000000..78022796 --- /dev/null +++ b/python/uv.lock @@ -0,0 +1,786 @@ +version = 1 +revision = 1 +requires-python = ">=3.9" +resolution-markers = [ + "python_full_version >= '3.10'", + "python_full_version < '3.10'", +] + +[[package]] +name = "certifi" +version = "2025.1.31" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1c/ab/c9f1e32b7b1bf505bf26f0ef697775960db7932abeb7b516de930ba2705f/certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651", size = 167577 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/38/fc/bce832fd4fd99766c04d1ee0eead6b0ec6486fb100ae5e74c1d91292b982/certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe", size = 166393 }, +] + +[[package]] +name = "cffi" +version = "1.17.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pycparser" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fc/97/c783634659c2920c3fc70419e3af40972dbaf758daa229a7d6ea6135c90d/cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824", size = 516621 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/90/07/f44ca684db4e4f08a3fdc6eeb9a0d15dc6883efc7b8c90357fdbf74e186c/cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14", size = 182191 }, + { url = "https://files.pythonhosted.org/packages/08/fd/cc2fedbd887223f9f5d170c96e57cbf655df9831a6546c1727ae13fa977a/cffi-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67", size = 178592 }, + { url = "https://files.pythonhosted.org/packages/de/cc/4635c320081c78d6ffc2cab0a76025b691a91204f4aa317d568ff9280a2d/cffi-1.17.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:edae79245293e15384b51f88b00613ba9f7198016a5948b5dddf4917d4d26382", size = 426024 }, + { url = "https://files.pythonhosted.org/packages/b6/7b/3b2b250f3aab91abe5f8a51ada1b717935fdaec53f790ad4100fe2ec64d1/cffi-1.17.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45398b671ac6d70e67da8e4224a065cec6a93541bb7aebe1b198a61b58c7b702", size = 448188 }, + { url = "https://files.pythonhosted.org/packages/d3/48/1b9283ebbf0ec065148d8de05d647a986c5f22586b18120020452fff8f5d/cffi-1.17.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ad9413ccdeda48c5afdae7e4fa2192157e991ff761e7ab8fdd8926f40b160cc3", size = 455571 }, + { url = "https://files.pythonhosted.org/packages/40/87/3b8452525437b40f39ca7ff70276679772ee7e8b394934ff60e63b7b090c/cffi-1.17.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5da5719280082ac6bd9aa7becb3938dc9f9cbd57fac7d2871717b1feb0902ab6", size = 436687 }, + { url = "https://files.pythonhosted.org/packages/8d/fb/4da72871d177d63649ac449aec2e8a29efe0274035880c7af59101ca2232/cffi-1.17.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bb1a08b8008b281856e5971307cc386a8e9c5b625ac297e853d36da6efe9c17", size = 446211 }, + { url = "https://files.pythonhosted.org/packages/ab/a0/62f00bcb411332106c02b663b26f3545a9ef136f80d5df746c05878f8c4b/cffi-1.17.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:045d61c734659cc045141be4bae381a41d89b741f795af1dd018bfb532fd0df8", size = 461325 }, + { url = "https://files.pythonhosted.org/packages/36/83/76127035ed2e7e27b0787604d99da630ac3123bfb02d8e80c633f218a11d/cffi-1.17.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6883e737d7d9e4899a8a695e00ec36bd4e5e4f18fabe0aca0efe0a4b44cdb13e", size = 438784 }, + { url = "https://files.pythonhosted.org/packages/21/81/a6cd025db2f08ac88b901b745c163d884641909641f9b826e8cb87645942/cffi-1.17.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6b8b4a92e1c65048ff98cfe1f735ef8f1ceb72e3d5f0c25fdb12087a23da22be", size = 461564 }, + { url = "https://files.pythonhosted.org/packages/f8/fe/4d41c2f200c4a457933dbd98d3cf4e911870877bd94d9656cc0fcb390681/cffi-1.17.1-cp310-cp310-win32.whl", hash = "sha256:c9c3d058ebabb74db66e431095118094d06abf53284d9c81f27300d0e0d8bc7c", size = 171804 }, + { url = "https://files.pythonhosted.org/packages/d1/b6/0b0f5ab93b0df4acc49cae758c81fe4e5ef26c3ae2e10cc69249dfd8b3ab/cffi-1.17.1-cp310-cp310-win_amd64.whl", hash = "sha256:0f048dcf80db46f0098ccac01132761580d28e28bc0f78ae0d58048063317e15", size = 181299 }, + { url = "https://files.pythonhosted.org/packages/6b/f4/927e3a8899e52a27fa57a48607ff7dc91a9ebe97399b357b85a0c7892e00/cffi-1.17.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a45e3c6913c5b87b3ff120dcdc03f6131fa0065027d0ed7ee6190736a74cd401", size = 182264 }, + { url = "https://files.pythonhosted.org/packages/6c/f5/6c3a8efe5f503175aaddcbea6ad0d2c96dad6f5abb205750d1b3df44ef29/cffi-1.17.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:30c5e0cb5ae493c04c8b42916e52ca38079f1b235c2f8ae5f4527b963c401caf", size = 178651 }, + { url = "https://files.pythonhosted.org/packages/94/dd/a3f0118e688d1b1a57553da23b16bdade96d2f9bcda4d32e7d2838047ff7/cffi-1.17.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f75c7ab1f9e4aca5414ed4d8e5c0e303a34f4421f8a0d47a4d019ceff0ab6af4", size = 445259 }, + { url = "https://files.pythonhosted.org/packages/2e/ea/70ce63780f096e16ce8588efe039d3c4f91deb1dc01e9c73a287939c79a6/cffi-1.17.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1ed2dd2972641495a3ec98445e09766f077aee98a1c896dcb4ad0d303628e41", size = 469200 }, + { url = "https://files.pythonhosted.org/packages/1c/a0/a4fa9f4f781bda074c3ddd57a572b060fa0df7655d2a4247bbe277200146/cffi-1.17.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:46bf43160c1a35f7ec506d254e5c890f3c03648a4dbac12d624e4490a7046cd1", size = 477235 }, + { url = "https://files.pythonhosted.org/packages/62/12/ce8710b5b8affbcdd5c6e367217c242524ad17a02fe5beec3ee339f69f85/cffi-1.17.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a24ed04c8ffd54b0729c07cee15a81d964e6fee0e3d4d342a27b020d22959dc6", size = 459721 }, + { url = "https://files.pythonhosted.org/packages/ff/6b/d45873c5e0242196f042d555526f92aa9e0c32355a1be1ff8c27f077fd37/cffi-1.17.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:610faea79c43e44c71e1ec53a554553fa22321b65fae24889706c0a84d4ad86d", size = 467242 }, + { url = "https://files.pythonhosted.org/packages/1a/52/d9a0e523a572fbccf2955f5abe883cfa8bcc570d7faeee06336fbd50c9fc/cffi-1.17.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a9b15d491f3ad5d692e11f6b71f7857e7835eb677955c00cc0aefcd0669adaf6", size = 477999 }, + { url = "https://files.pythonhosted.org/packages/44/74/f2a2460684a1a2d00ca799ad880d54652841a780c4c97b87754f660c7603/cffi-1.17.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:de2ea4b5833625383e464549fec1bc395c1bdeeb5f25c4a3a82b5a8c756ec22f", size = 454242 }, + { url = "https://files.pythonhosted.org/packages/f8/4a/34599cac7dfcd888ff54e801afe06a19c17787dfd94495ab0c8d35fe99fb/cffi-1.17.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fc48c783f9c87e60831201f2cce7f3b2e4846bf4d8728eabe54d60700b318a0b", size = 478604 }, + { url = "https://files.pythonhosted.org/packages/34/33/e1b8a1ba29025adbdcda5fb3a36f94c03d771c1b7b12f726ff7fef2ebe36/cffi-1.17.1-cp311-cp311-win32.whl", hash = "sha256:85a950a4ac9c359340d5963966e3e0a94a676bd6245a4b55bc43949eee26a655", size = 171727 }, + { url = "https://files.pythonhosted.org/packages/3d/97/50228be003bb2802627d28ec0627837ac0bf35c90cf769812056f235b2d1/cffi-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:caaf0640ef5f5517f49bc275eca1406b0ffa6aa184892812030f04c2abf589a0", size = 181400 }, + { url = "https://files.pythonhosted.org/packages/5a/84/e94227139ee5fb4d600a7a4927f322e1d4aea6fdc50bd3fca8493caba23f/cffi-1.17.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4", size = 183178 }, + { url = "https://files.pythonhosted.org/packages/da/ee/fb72c2b48656111c4ef27f0f91da355e130a923473bf5ee75c5643d00cca/cffi-1.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c", size = 178840 }, + { url = "https://files.pythonhosted.org/packages/cc/b6/db007700f67d151abadf508cbfd6a1884f57eab90b1bb985c4c8c02b0f28/cffi-1.17.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36", size = 454803 }, + { url = "https://files.pythonhosted.org/packages/1a/df/f8d151540d8c200eb1c6fba8cd0dfd40904f1b0682ea705c36e6c2e97ab3/cffi-1.17.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5", size = 478850 }, + { url = "https://files.pythonhosted.org/packages/28/c0/b31116332a547fd2677ae5b78a2ef662dfc8023d67f41b2a83f7c2aa78b1/cffi-1.17.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff", size = 485729 }, + { url = "https://files.pythonhosted.org/packages/91/2b/9a1ddfa5c7f13cab007a2c9cc295b70fbbda7cb10a286aa6810338e60ea1/cffi-1.17.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99", size = 471256 }, + { url = "https://files.pythonhosted.org/packages/b2/d5/da47df7004cb17e4955df6a43d14b3b4ae77737dff8bf7f8f333196717bf/cffi-1.17.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93", size = 479424 }, + { url = "https://files.pythonhosted.org/packages/0b/ac/2a28bcf513e93a219c8a4e8e125534f4f6db03e3179ba1c45e949b76212c/cffi-1.17.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3", size = 484568 }, + { url = "https://files.pythonhosted.org/packages/d4/38/ca8a4f639065f14ae0f1d9751e70447a261f1a30fa7547a828ae08142465/cffi-1.17.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8", size = 488736 }, + { url = "https://files.pythonhosted.org/packages/86/c5/28b2d6f799ec0bdecf44dced2ec5ed43e0eb63097b0f58c293583b406582/cffi-1.17.1-cp312-cp312-win32.whl", hash = "sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65", size = 172448 }, + { url = "https://files.pythonhosted.org/packages/50/b9/db34c4755a7bd1cb2d1603ac3863f22bcecbd1ba29e5ee841a4bc510b294/cffi-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903", size = 181976 }, + { url = "https://files.pythonhosted.org/packages/8d/f8/dd6c246b148639254dad4d6803eb6a54e8c85c6e11ec9df2cffa87571dbe/cffi-1.17.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f3a2b4222ce6b60e2e8b337bb9596923045681d71e5a082783484d845390938e", size = 182989 }, + { url = "https://files.pythonhosted.org/packages/8b/f1/672d303ddf17c24fc83afd712316fda78dc6fce1cd53011b839483e1ecc8/cffi-1.17.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2", size = 178802 }, + { url = "https://files.pythonhosted.org/packages/0e/2d/eab2e858a91fdff70533cab61dcff4a1f55ec60425832ddfdc9cd36bc8af/cffi-1.17.1-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d01b12eeeb4427d3110de311e1774046ad344f5b1a7403101878976ecd7a10f3", size = 454792 }, + { url = "https://files.pythonhosted.org/packages/75/b2/fbaec7c4455c604e29388d55599b99ebcc250a60050610fadde58932b7ee/cffi-1.17.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:706510fe141c86a69c8ddc029c7910003a17353970cff3b904ff0686a5927683", size = 478893 }, + { url = "https://files.pythonhosted.org/packages/4f/b7/6e4a2162178bf1935c336d4da8a9352cccab4d3a5d7914065490f08c0690/cffi-1.17.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de55b766c7aa2e2a3092c51e0483d700341182f08e67c63630d5b6f200bb28e5", size = 485810 }, + { url = "https://files.pythonhosted.org/packages/c7/8a/1d0e4a9c26e54746dc08c2c6c037889124d4f59dffd853a659fa545f1b40/cffi-1.17.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c59d6e989d07460165cc5ad3c61f9fd8f1b4796eacbd81cee78957842b834af4", size = 471200 }, + { url = "https://files.pythonhosted.org/packages/26/9f/1aab65a6c0db35f43c4d1b4f580e8df53914310afc10ae0397d29d697af4/cffi-1.17.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd398dbc6773384a17fe0d3e7eeb8d1a21c2200473ee6806bb5e6a8e62bb73dd", size = 479447 }, + { url = "https://files.pythonhosted.org/packages/5f/e4/fb8b3dd8dc0e98edf1135ff067ae070bb32ef9d509d6cb0f538cd6f7483f/cffi-1.17.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:3edc8d958eb099c634dace3c7e16560ae474aa3803a5df240542b305d14e14ed", size = 484358 }, + { url = "https://files.pythonhosted.org/packages/f1/47/d7145bf2dc04684935d57d67dff9d6d795b2ba2796806bb109864be3a151/cffi-1.17.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9", size = 488469 }, + { url = "https://files.pythonhosted.org/packages/bf/ee/f94057fa6426481d663b88637a9a10e859e492c73d0384514a17d78ee205/cffi-1.17.1-cp313-cp313-win32.whl", hash = "sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d", size = 172475 }, + { url = "https://files.pythonhosted.org/packages/7c/fc/6a8cb64e5f0324877d503c854da15d76c1e50eb722e320b15345c4d0c6de/cffi-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a", size = 182009 }, + { url = "https://files.pythonhosted.org/packages/b9/ea/8bb50596b8ffbc49ddd7a1ad305035daa770202a6b782fc164647c2673ad/cffi-1.17.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b2ab587605f4ba0bf81dc0cb08a41bd1c0a5906bd59243d56bad7668a6fc6c16", size = 182220 }, + { url = "https://files.pythonhosted.org/packages/ae/11/e77c8cd24f58285a82c23af484cf5b124a376b32644e445960d1a4654c3a/cffi-1.17.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:28b16024becceed8c6dfbc75629e27788d8a3f9030691a1dbf9821a128b22c36", size = 178605 }, + { url = "https://files.pythonhosted.org/packages/ed/65/25a8dc32c53bf5b7b6c2686b42ae2ad58743f7ff644844af7cdb29b49361/cffi-1.17.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d599671f396c4723d016dbddb72fe8e0397082b0a77a4fab8028923bec050e8", size = 424910 }, + { url = "https://files.pythonhosted.org/packages/42/7a/9d086fab7c66bd7c4d0f27c57a1b6b068ced810afc498cc8c49e0088661c/cffi-1.17.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca74b8dbe6e8e8263c0ffd60277de77dcee6c837a3d0881d8c1ead7268c9e576", size = 447200 }, + { url = "https://files.pythonhosted.org/packages/da/63/1785ced118ce92a993b0ec9e0d0ac8dc3e5dbfbcaa81135be56c69cabbb6/cffi-1.17.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f7f5baafcc48261359e14bcd6d9bff6d4b28d9103847c9e136694cb0501aef87", size = 454565 }, + { url = "https://files.pythonhosted.org/packages/74/06/90b8a44abf3556599cdec107f7290277ae8901a58f75e6fe8f970cd72418/cffi-1.17.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98e3969bcff97cae1b2def8ba499ea3d6f31ddfdb7635374834cf89a1a08ecf0", size = 435635 }, + { url = "https://files.pythonhosted.org/packages/bd/62/a1f468e5708a70b1d86ead5bab5520861d9c7eacce4a885ded9faa7729c3/cffi-1.17.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cdf5ce3acdfd1661132f2a9c19cac174758dc2352bfe37d98aa7512c6b7178b3", size = 445218 }, + { url = "https://files.pythonhosted.org/packages/5b/95/b34462f3ccb09c2594aa782d90a90b045de4ff1f70148ee79c69d37a0a5a/cffi-1.17.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9755e4345d1ec879e3849e62222a18c7174d65a6a92d5b346b1863912168b595", size = 460486 }, + { url = "https://files.pythonhosted.org/packages/fc/fc/a1e4bebd8d680febd29cf6c8a40067182b64f00c7d105f8f26b5bc54317b/cffi-1.17.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f1e22e8c4419538cb197e4dd60acc919d7696e5ef98ee4da4e01d3f8cfa4cc5a", size = 437911 }, + { url = "https://files.pythonhosted.org/packages/e6/c3/21cab7a6154b6a5ea330ae80de386e7665254835b9e98ecc1340b3a7de9a/cffi-1.17.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c03e868a0b3bc35839ba98e74211ed2b05d2119be4e8a0f224fba9384f1fe02e", size = 460632 }, + { url = "https://files.pythonhosted.org/packages/cb/b5/fd9f8b5a84010ca169ee49f4e4ad6f8c05f4e3545b72ee041dbbcb159882/cffi-1.17.1-cp39-cp39-win32.whl", hash = "sha256:e31ae45bc2e29f6b2abd0de1cc3b9d5205aa847cafaecb8af1476a609a2f6eb7", size = 171820 }, + { url = "https://files.pythonhosted.org/packages/8c/52/b08750ce0bce45c143e1b5d7357ee8c55341b52bdef4b0f081af1eb248c2/cffi-1.17.1-cp39-cp39-win_amd64.whl", hash = "sha256:d016c76bdd850f3c626af19b0542c9677ba156e4ee4fccfdd7848803533ef662", size = 181290 }, +] + +[[package]] +name = "charset-normalizer" +version = "3.4.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/16/b0/572805e227f01586461c80e0fd25d65a2115599cc9dad142fee4b747c357/charset_normalizer-3.4.1.tar.gz", hash = "sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3", size = 123188 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0d/58/5580c1716040bc89206c77d8f74418caf82ce519aae06450393ca73475d1/charset_normalizer-3.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de", size = 198013 }, + { url = "https://files.pythonhosted.org/packages/d0/11/00341177ae71c6f5159a08168bcb98c6e6d196d372c94511f9f6c9afe0c6/charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176", size = 141285 }, + { url = "https://files.pythonhosted.org/packages/01/09/11d684ea5819e5a8f5100fb0b38cf8d02b514746607934134d31233e02c8/charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e218488cd232553829be0664c2292d3af2eeeb94b32bea483cf79ac6a694e037", size = 151449 }, + { url = "https://files.pythonhosted.org/packages/08/06/9f5a12939db324d905dc1f70591ae7d7898d030d7662f0d426e2286f68c9/charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:80ed5e856eb7f30115aaf94e4a08114ccc8813e6ed1b5efa74f9f82e8509858f", size = 143892 }, + { url = "https://files.pythonhosted.org/packages/93/62/5e89cdfe04584cb7f4d36003ffa2936681b03ecc0754f8e969c2becb7e24/charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b010a7a4fd316c3c484d482922d13044979e78d1861f0e0650423144c616a46a", size = 146123 }, + { url = "https://files.pythonhosted.org/packages/a9/ac/ab729a15c516da2ab70a05f8722ecfccc3f04ed7a18e45c75bbbaa347d61/charset_normalizer-3.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4532bff1b8421fd0a320463030c7520f56a79c9024a4e88f01c537316019005a", size = 147943 }, + { url = "https://files.pythonhosted.org/packages/03/d2/3f392f23f042615689456e9a274640c1d2e5dd1d52de36ab8f7955f8f050/charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d973f03c0cb71c5ed99037b870f2be986c3c05e63622c017ea9816881d2dd247", size = 142063 }, + { url = "https://files.pythonhosted.org/packages/f2/e3/e20aae5e1039a2cd9b08d9205f52142329f887f8cf70da3650326670bddf/charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3a3bd0dcd373514dcec91c411ddb9632c0d7d92aed7093b8c3bbb6d69ca74408", size = 150578 }, + { url = "https://files.pythonhosted.org/packages/8d/af/779ad72a4da0aed925e1139d458adc486e61076d7ecdcc09e610ea8678db/charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:d9c3cdf5390dcd29aa8056d13e8e99526cda0305acc038b96b30352aff5ff2bb", size = 153629 }, + { url = "https://files.pythonhosted.org/packages/c2/b6/7aa450b278e7aa92cf7732140bfd8be21f5f29d5bf334ae987c945276639/charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:2bdfe3ac2e1bbe5b59a1a63721eb3b95fc9b6817ae4a46debbb4e11f6232428d", size = 150778 }, + { url = "https://files.pythonhosted.org/packages/39/f4/d9f4f712d0951dcbfd42920d3db81b00dd23b6ab520419626f4023334056/charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:eab677309cdb30d047996b36d34caeda1dc91149e4fdca0b1a039b3f79d9a807", size = 146453 }, + { url = "https://files.pythonhosted.org/packages/49/2b/999d0314e4ee0cff3cb83e6bc9aeddd397eeed693edb4facb901eb8fbb69/charset_normalizer-3.4.1-cp310-cp310-win32.whl", hash = "sha256:c0429126cf75e16c4f0ad00ee0eae4242dc652290f940152ca8c75c3a4b6ee8f", size = 95479 }, + { url = "https://files.pythonhosted.org/packages/2d/ce/3cbed41cff67e455a386fb5e5dd8906cdda2ed92fbc6297921f2e4419309/charset_normalizer-3.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:9f0b8b1c6d84c8034a44893aba5e767bf9c7a211e313a9605d9c617d7083829f", size = 102790 }, + { url = "https://files.pythonhosted.org/packages/72/80/41ef5d5a7935d2d3a773e3eaebf0a9350542f2cab4eac59a7a4741fbbbbe/charset_normalizer-3.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8bfa33f4f2672964266e940dd22a195989ba31669bd84629f05fab3ef4e2d125", size = 194995 }, + { url = "https://files.pythonhosted.org/packages/7a/28/0b9fefa7b8b080ec492110af6d88aa3dea91c464b17d53474b6e9ba5d2c5/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28bf57629c75e810b6ae989f03c0828d64d6b26a5e205535585f96093e405ed1", size = 139471 }, + { url = "https://files.pythonhosted.org/packages/71/64/d24ab1a997efb06402e3fc07317e94da358e2585165930d9d59ad45fcae2/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f08ff5e948271dc7e18a35641d2f11a4cd8dfd5634f55228b691e62b37125eb3", size = 149831 }, + { url = "https://files.pythonhosted.org/packages/37/ed/be39e5258e198655240db5e19e0b11379163ad7070962d6b0c87ed2c4d39/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:234ac59ea147c59ee4da87a0c0f098e9c8d169f4dc2a159ef720f1a61bbe27cd", size = 142335 }, + { url = "https://files.pythonhosted.org/packages/88/83/489e9504711fa05d8dde1574996408026bdbdbd938f23be67deebb5eca92/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd4ec41f914fa74ad1b8304bbc634b3de73d2a0889bd32076342a573e0779e00", size = 143862 }, + { url = "https://files.pythonhosted.org/packages/c6/c7/32da20821cf387b759ad24627a9aca289d2822de929b8a41b6241767b461/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eea6ee1db730b3483adf394ea72f808b6e18cf3cb6454b4d86e04fa8c4327a12", size = 145673 }, + { url = "https://files.pythonhosted.org/packages/68/85/f4288e96039abdd5aeb5c546fa20a37b50da71b5cf01e75e87f16cd43304/charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c96836c97b1238e9c9e3fe90844c947d5afbf4f4c92762679acfe19927d81d77", size = 140211 }, + { url = "https://files.pythonhosted.org/packages/28/a3/a42e70d03cbdabc18997baf4f0227c73591a08041c149e710045c281f97b/charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:4d86f7aff21ee58f26dcf5ae81a9addbd914115cdebcbb2217e4f0ed8982e146", size = 148039 }, + { url = "https://files.pythonhosted.org/packages/85/e4/65699e8ab3014ecbe6f5c71d1a55d810fb716bbfd74f6283d5c2aa87febf/charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:09b5e6733cbd160dcc09589227187e242a30a49ca5cefa5a7edd3f9d19ed53fd", size = 151939 }, + { url = "https://files.pythonhosted.org/packages/b1/82/8e9fe624cc5374193de6860aba3ea8070f584c8565ee77c168ec13274bd2/charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:5777ee0881f9499ed0f71cc82cf873d9a0ca8af166dfa0af8ec4e675b7df48e6", size = 149075 }, + { url = "https://files.pythonhosted.org/packages/3d/7b/82865ba54c765560c8433f65e8acb9217cb839a9e32b42af4aa8e945870f/charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:237bdbe6159cff53b4f24f397d43c6336c6b0b42affbe857970cefbb620911c8", size = 144340 }, + { url = "https://files.pythonhosted.org/packages/b5/b6/9674a4b7d4d99a0d2df9b215da766ee682718f88055751e1e5e753c82db0/charset_normalizer-3.4.1-cp311-cp311-win32.whl", hash = "sha256:8417cb1f36cc0bc7eaba8ccb0e04d55f0ee52df06df3ad55259b9a323555fc8b", size = 95205 }, + { url = "https://files.pythonhosted.org/packages/1e/ab/45b180e175de4402dcf7547e4fb617283bae54ce35c27930a6f35b6bef15/charset_normalizer-3.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:d7f50a1f8c450f3925cb367d011448c39239bb3eb4117c36a6d354794de4ce76", size = 102441 }, + { url = "https://files.pythonhosted.org/packages/0a/9a/dd1e1cdceb841925b7798369a09279bd1cf183cef0f9ddf15a3a6502ee45/charset_normalizer-3.4.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:73d94b58ec7fecbc7366247d3b0b10a21681004153238750bb67bd9012414545", size = 196105 }, + { url = "https://files.pythonhosted.org/packages/d3/8c/90bfabf8c4809ecb648f39794cf2a84ff2e7d2a6cf159fe68d9a26160467/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dad3e487649f498dd991eeb901125411559b22e8d7ab25d3aeb1af367df5efd7", size = 140404 }, + { url = "https://files.pythonhosted.org/packages/ad/8f/e410d57c721945ea3b4f1a04b74f70ce8fa800d393d72899f0a40526401f/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c30197aa96e8eed02200a83fba2657b4c3acd0f0aa4bdc9f6c1af8e8962e0757", size = 150423 }, + { url = "https://files.pythonhosted.org/packages/f0/b8/e6825e25deb691ff98cf5c9072ee0605dc2acfca98af70c2d1b1bc75190d/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2369eea1ee4a7610a860d88f268eb39b95cb588acd7235e02fd5a5601773d4fa", size = 143184 }, + { url = "https://files.pythonhosted.org/packages/3e/a2/513f6cbe752421f16d969e32f3583762bfd583848b763913ddab8d9bfd4f/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc2722592d8998c870fa4e290c2eec2c1569b87fe58618e67d38b4665dfa680d", size = 145268 }, + { url = "https://files.pythonhosted.org/packages/74/94/8a5277664f27c3c438546f3eb53b33f5b19568eb7424736bdc440a88a31f/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffc9202a29ab3920fa812879e95a9e78b2465fd10be7fcbd042899695d75e616", size = 147601 }, + { url = "https://files.pythonhosted.org/packages/7c/5f/6d352c51ee763623a98e31194823518e09bfa48be2a7e8383cf691bbb3d0/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:804a4d582ba6e5b747c625bf1255e6b1507465494a40a2130978bda7b932c90b", size = 141098 }, + { url = "https://files.pythonhosted.org/packages/78/d4/f5704cb629ba5ab16d1d3d741396aec6dc3ca2b67757c45b0599bb010478/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0f55e69f030f7163dffe9fd0752b32f070566451afe180f99dbeeb81f511ad8d", size = 149520 }, + { url = "https://files.pythonhosted.org/packages/c5/96/64120b1d02b81785f222b976c0fb79a35875457fa9bb40827678e54d1bc8/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c4c3e6da02df6fa1410a7680bd3f63d4f710232d3139089536310d027950696a", size = 152852 }, + { url = "https://files.pythonhosted.org/packages/84/c9/98e3732278a99f47d487fd3468bc60b882920cef29d1fa6ca460a1fdf4e6/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:5df196eb874dae23dcfb968c83d4f8fdccb333330fe1fc278ac5ceeb101003a9", size = 150488 }, + { url = "https://files.pythonhosted.org/packages/13/0e/9c8d4cb99c98c1007cc11eda969ebfe837bbbd0acdb4736d228ccaabcd22/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e358e64305fe12299a08e08978f51fc21fac060dcfcddd95453eabe5b93ed0e1", size = 146192 }, + { url = "https://files.pythonhosted.org/packages/b2/21/2b6b5b860781a0b49427309cb8670785aa543fb2178de875b87b9cc97746/charset_normalizer-3.4.1-cp312-cp312-win32.whl", hash = "sha256:9b23ca7ef998bc739bf6ffc077c2116917eabcc901f88da1b9856b210ef63f35", size = 95550 }, + { url = "https://files.pythonhosted.org/packages/21/5b/1b390b03b1d16c7e382b561c5329f83cc06623916aab983e8ab9239c7d5c/charset_normalizer-3.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:6ff8a4a60c227ad87030d76e99cd1698345d4491638dfa6673027c48b3cd395f", size = 102785 }, + { url = "https://files.pythonhosted.org/packages/38/94/ce8e6f63d18049672c76d07d119304e1e2d7c6098f0841b51c666e9f44a0/charset_normalizer-3.4.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:aabfa34badd18f1da5ec1bc2715cadc8dca465868a4e73a0173466b688f29dda", size = 195698 }, + { url = "https://files.pythonhosted.org/packages/24/2e/dfdd9770664aae179a96561cc6952ff08f9a8cd09a908f259a9dfa063568/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22e14b5d70560b8dd51ec22863f370d1e595ac3d024cb8ad7d308b4cd95f8313", size = 140162 }, + { url = "https://files.pythonhosted.org/packages/24/4e/f646b9093cff8fc86f2d60af2de4dc17c759de9d554f130b140ea4738ca6/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8436c508b408b82d87dc5f62496973a1805cd46727c34440b0d29d8a2f50a6c9", size = 150263 }, + { url = "https://files.pythonhosted.org/packages/5e/67/2937f8d548c3ef6e2f9aab0f6e21001056f692d43282b165e7c56023e6dd/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d074908e1aecee37a7635990b2c6d504cd4766c7bc9fc86d63f9c09af3fa11b", size = 142966 }, + { url = "https://files.pythonhosted.org/packages/52/ed/b7f4f07de100bdb95c1756d3a4d17b90c1a3c53715c1a476f8738058e0fa/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:955f8851919303c92343d2f66165294848d57e9bba6cf6e3625485a70a038d11", size = 144992 }, + { url = "https://files.pythonhosted.org/packages/96/2c/d49710a6dbcd3776265f4c923bb73ebe83933dfbaa841c5da850fe0fd20b/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:44ecbf16649486d4aebafeaa7ec4c9fed8b88101f4dd612dcaf65d5e815f837f", size = 147162 }, + { url = "https://files.pythonhosted.org/packages/b4/41/35ff1f9a6bd380303dea55e44c4933b4cc3c4850988927d4082ada230273/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0924e81d3d5e70f8126529951dac65c1010cdf117bb75eb02dd12339b57749dd", size = 140972 }, + { url = "https://files.pythonhosted.org/packages/fb/43/c6a0b685fe6910d08ba971f62cd9c3e862a85770395ba5d9cad4fede33ab/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2967f74ad52c3b98de4c3b32e1a44e32975e008a9cd2a8cc8966d6a5218c5cb2", size = 149095 }, + { url = "https://files.pythonhosted.org/packages/4c/ff/a9a504662452e2d2878512115638966e75633519ec11f25fca3d2049a94a/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c75cb2a3e389853835e84a2d8fb2b81a10645b503eca9bcb98df6b5a43eb8886", size = 152668 }, + { url = "https://files.pythonhosted.org/packages/6c/71/189996b6d9a4b932564701628af5cee6716733e9165af1d5e1b285c530ed/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:09b26ae6b1abf0d27570633b2b078a2a20419c99d66fb2823173d73f188ce601", size = 150073 }, + { url = "https://files.pythonhosted.org/packages/e4/93/946a86ce20790e11312c87c75ba68d5f6ad2208cfb52b2d6a2c32840d922/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa88b843d6e211393a37219e6a1c1df99d35e8fd90446f1118f4216e307e48cd", size = 145732 }, + { url = "https://files.pythonhosted.org/packages/cd/e5/131d2fb1b0dddafc37be4f3a2fa79aa4c037368be9423061dccadfd90091/charset_normalizer-3.4.1-cp313-cp313-win32.whl", hash = "sha256:eb8178fe3dba6450a3e024e95ac49ed3400e506fd4e9e5c32d30adda88cbd407", size = 95391 }, + { url = "https://files.pythonhosted.org/packages/27/f2/4f9a69cc7712b9b5ad8fdb87039fd89abba997ad5cbe690d1835d40405b0/charset_normalizer-3.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:b1ac5992a838106edb89654e0aebfc24f5848ae2547d22c2c3f66454daa11971", size = 102702 }, + { url = "https://files.pythonhosted.org/packages/7f/c0/b913f8f02836ed9ab32ea643c6fe4d3325c3d8627cf6e78098671cafff86/charset_normalizer-3.4.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b97e690a2118911e39b4042088092771b4ae3fc3aa86518f84b8cf6888dbdb41", size = 197867 }, + { url = "https://files.pythonhosted.org/packages/0f/6c/2bee440303d705b6fb1e2ec789543edec83d32d258299b16eed28aad48e0/charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78baa6d91634dfb69ec52a463534bc0df05dbd546209b79a3880a34487f4b84f", size = 141385 }, + { url = "https://files.pythonhosted.org/packages/3d/04/cb42585f07f6f9fd3219ffb6f37d5a39b4fd2db2355b23683060029c35f7/charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1a2bc9f351a75ef49d664206d51f8e5ede9da246602dc2d2726837620ea034b2", size = 151367 }, + { url = "https://files.pythonhosted.org/packages/54/54/2412a5b093acb17f0222de007cc129ec0e0df198b5ad2ce5699355269dfe/charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:75832c08354f595c760a804588b9357d34ec00ba1c940c15e31e96d902093770", size = 143928 }, + { url = "https://files.pythonhosted.org/packages/5a/6d/e2773862b043dcf8a221342954f375392bb2ce6487bcd9f2c1b34e1d6781/charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0af291f4fe114be0280cdd29d533696a77b5b49cfde5467176ecab32353395c4", size = 146203 }, + { url = "https://files.pythonhosted.org/packages/b9/f8/ca440ef60d8f8916022859885f231abb07ada3c347c03d63f283bec32ef5/charset_normalizer-3.4.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0167ddc8ab6508fe81860a57dd472b2ef4060e8d378f0cc555707126830f2537", size = 148082 }, + { url = "https://files.pythonhosted.org/packages/04/d2/42fd330901aaa4b805a1097856c2edf5095e260a597f65def493f4b8c833/charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:2a75d49014d118e4198bcee5ee0a6f25856b29b12dbf7cd012791f8a6cc5c496", size = 142053 }, + { url = "https://files.pythonhosted.org/packages/9e/af/3a97a4fa3c53586f1910dadfc916e9c4f35eeada36de4108f5096cb7215f/charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:363e2f92b0f0174b2f8238240a1a30142e3db7b957a5dd5689b0e75fb717cc78", size = 150625 }, + { url = "https://files.pythonhosted.org/packages/26/ae/23d6041322a3556e4da139663d02fb1b3c59a23ab2e2b56432bd2ad63ded/charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:ab36c8eb7e454e34e60eb55ca5d241a5d18b2c6244f6827a30e451c42410b5f7", size = 153549 }, + { url = "https://files.pythonhosted.org/packages/94/22/b8f2081c6a77cb20d97e57e0b385b481887aa08019d2459dc2858ed64871/charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:4c0907b1928a36d5a998d72d64d8eaa7244989f7aaaf947500d3a800c83a3fd6", size = 150945 }, + { url = "https://files.pythonhosted.org/packages/c7/0b/c5ec5092747f801b8b093cdf5610e732b809d6cb11f4c51e35fc28d1d389/charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:04432ad9479fa40ec0f387795ddad4437a2b50417c69fa275e212933519ff294", size = 146595 }, + { url = "https://files.pythonhosted.org/packages/0c/5a/0b59704c38470df6768aa154cc87b1ac7c9bb687990a1559dc8765e8627e/charset_normalizer-3.4.1-cp39-cp39-win32.whl", hash = "sha256:3bed14e9c89dcb10e8f3a29f9ccac4955aebe93c71ae803af79265c9ca5644c5", size = 95453 }, + { url = "https://files.pythonhosted.org/packages/85/2d/a9790237cb4d01a6d57afadc8573c8b73c609ade20b80f4cda30802009ee/charset_normalizer-3.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:49402233c892a461407c512a19435d1ce275543138294f7ef013f0b63d5d3765", size = 102811 }, + { url = "https://files.pythonhosted.org/packages/0e/f6/65ecc6878a89bb1c23a086ea335ad4bf21a588990c3f535a227b9eea9108/charset_normalizer-3.4.1-py3-none-any.whl", hash = "sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85", size = 49767 }, +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 }, +] + +[[package]] +name = "cryptography" +version = "44.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/cd/25/4ce80c78963834b8a9fd1cc1266be5ed8d1840785c0f2e1b73b8d128d505/cryptography-44.0.2.tar.gz", hash = "sha256:c63454aa261a0cf0c5b4718349629793e9e634993538db841165b3df74f37ec0", size = 710807 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/92/ef/83e632cfa801b221570c5f58c0369db6fa6cef7d9ff859feab1aae1a8a0f/cryptography-44.0.2-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:efcfe97d1b3c79e486554efddeb8f6f53a4cdd4cf6086642784fa31fc384e1d7", size = 6676361 }, + { url = "https://files.pythonhosted.org/packages/30/ec/7ea7c1e4c8fc8329506b46c6c4a52e2f20318425d48e0fe597977c71dbce/cryptography-44.0.2-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29ecec49f3ba3f3849362854b7253a9f59799e3763b0c9d0826259a88efa02f1", size = 3952350 }, + { url = "https://files.pythonhosted.org/packages/27/61/72e3afdb3c5ac510330feba4fc1faa0fe62e070592d6ad00c40bb69165e5/cryptography-44.0.2-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc821e161ae88bfe8088d11bb39caf2916562e0a2dc7b6d56714a48b784ef0bb", size = 4166572 }, + { url = "https://files.pythonhosted.org/packages/26/e4/ba680f0b35ed4a07d87f9e98f3ebccb05091f3bf6b5a478b943253b3bbd5/cryptography-44.0.2-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:3c00b6b757b32ce0f62c574b78b939afab9eecaf597c4d624caca4f9e71e7843", size = 3958124 }, + { url = "https://files.pythonhosted.org/packages/9c/e8/44ae3e68c8b6d1cbc59040288056df2ad7f7f03bbcaca6b503c737ab8e73/cryptography-44.0.2-cp37-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:7bdcd82189759aba3816d1f729ce42ffded1ac304c151d0a8e89b9996ab863d5", size = 3678122 }, + { url = "https://files.pythonhosted.org/packages/27/7b/664ea5e0d1eab511a10e480baf1c5d3e681c7d91718f60e149cec09edf01/cryptography-44.0.2-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:4973da6ca3db4405c54cd0b26d328be54c7747e89e284fcff166132eb7bccc9c", size = 4191831 }, + { url = "https://files.pythonhosted.org/packages/2a/07/79554a9c40eb11345e1861f46f845fa71c9e25bf66d132e123d9feb8e7f9/cryptography-44.0.2-cp37-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:4e389622b6927d8133f314949a9812972711a111d577a5d1f4bee5e58736b80a", size = 3960583 }, + { url = "https://files.pythonhosted.org/packages/bb/6d/858e356a49a4f0b591bd6789d821427de18432212e137290b6d8a817e9bf/cryptography-44.0.2-cp37-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:f514ef4cd14bb6fb484b4a60203e912cfcb64f2ab139e88c2274511514bf7308", size = 4191753 }, + { url = "https://files.pythonhosted.org/packages/b2/80/62df41ba4916067fa6b125aa8c14d7e9181773f0d5d0bd4dcef580d8b7c6/cryptography-44.0.2-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:1bc312dfb7a6e5d66082c87c34c8a62176e684b6fe3d90fcfe1568de675e6688", size = 4079550 }, + { url = "https://files.pythonhosted.org/packages/f3/cd/2558cc08f7b1bb40683f99ff4327f8dcfc7de3affc669e9065e14824511b/cryptography-44.0.2-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:3b721b8b4d948b218c88cb8c45a01793483821e709afe5f622861fc6182b20a7", size = 4298367 }, + { url = "https://files.pythonhosted.org/packages/71/59/94ccc74788945bc3bd4cf355d19867e8057ff5fdbcac781b1ff95b700fb1/cryptography-44.0.2-cp37-abi3-win32.whl", hash = "sha256:51e4de3af4ec3899d6d178a8c005226491c27c4ba84101bfb59c901e10ca9f79", size = 2772843 }, + { url = "https://files.pythonhosted.org/packages/ca/2c/0d0bbaf61ba05acb32f0841853cfa33ebb7a9ab3d9ed8bb004bd39f2da6a/cryptography-44.0.2-cp37-abi3-win_amd64.whl", hash = "sha256:c505d61b6176aaf982c5717ce04e87da5abc9a36a5b39ac03905c4aafe8de7aa", size = 3209057 }, + { url = "https://files.pythonhosted.org/packages/9e/be/7a26142e6d0f7683d8a382dd963745e65db895a79a280a30525ec92be890/cryptography-44.0.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:8e0ddd63e6bf1161800592c71ac794d3fb8001f2caebe0966e77c5234fa9efc3", size = 6677789 }, + { url = "https://files.pythonhosted.org/packages/06/88/638865be7198a84a7713950b1db7343391c6066a20e614f8fa286eb178ed/cryptography-44.0.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:81276f0ea79a208d961c433a947029e1a15948966658cf6710bbabb60fcc2639", size = 3951919 }, + { url = "https://files.pythonhosted.org/packages/d7/fc/99fe639bcdf58561dfad1faa8a7369d1dc13f20acd78371bb97a01613585/cryptography-44.0.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a1e657c0f4ea2a23304ee3f964db058c9e9e635cc7019c4aa21c330755ef6fd", size = 4167812 }, + { url = "https://files.pythonhosted.org/packages/53/7b/aafe60210ec93d5d7f552592a28192e51d3c6b6be449e7fd0a91399b5d07/cryptography-44.0.2-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:6210c05941994290f3f7f175a4a57dbbb2afd9273657614c506d5976db061181", size = 3958571 }, + { url = "https://files.pythonhosted.org/packages/16/32/051f7ce79ad5a6ef5e26a92b37f172ee2d6e1cce09931646eef8de1e9827/cryptography-44.0.2-cp39-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:d1c3572526997b36f245a96a2b1713bf79ce99b271bbcf084beb6b9b075f29ea", size = 3679832 }, + { url = "https://files.pythonhosted.org/packages/78/2b/999b2a1e1ba2206f2d3bca267d68f350beb2b048a41ea827e08ce7260098/cryptography-44.0.2-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:b042d2a275c8cee83a4b7ae30c45a15e6a4baa65a179a0ec2d78ebb90e4f6699", size = 4193719 }, + { url = "https://files.pythonhosted.org/packages/72/97/430e56e39a1356e8e8f10f723211a0e256e11895ef1a135f30d7d40f2540/cryptography-44.0.2-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:d03806036b4f89e3b13b6218fefea8d5312e450935b1a2d55f0524e2ed7c59d9", size = 3960852 }, + { url = "https://files.pythonhosted.org/packages/89/33/c1cf182c152e1d262cac56850939530c05ca6c8d149aa0dcee490b417e99/cryptography-44.0.2-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:c7362add18b416b69d58c910caa217f980c5ef39b23a38a0880dfd87bdf8cd23", size = 4193906 }, + { url = "https://files.pythonhosted.org/packages/e1/99/87cf26d4f125380dc674233971069bc28d19b07f7755b29861570e513650/cryptography-44.0.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:8cadc6e3b5a1f144a039ea08a0bdb03a2a92e19c46be3285123d32029f40a922", size = 4081572 }, + { url = "https://files.pythonhosted.org/packages/b3/9f/6a3e0391957cc0c5f84aef9fbdd763035f2b52e998a53f99345e3ac69312/cryptography-44.0.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6f101b1f780f7fc613d040ca4bdf835c6ef3b00e9bd7125a4255ec574c7916e4", size = 4298631 }, + { url = "https://files.pythonhosted.org/packages/e2/a5/5bc097adb4b6d22a24dea53c51f37e480aaec3465285c253098642696423/cryptography-44.0.2-cp39-abi3-win32.whl", hash = "sha256:3dc62975e31617badc19a906481deacdeb80b4bb454394b4098e3f2525a488c5", size = 2773792 }, + { url = "https://files.pythonhosted.org/packages/33/cf/1f7649b8b9a3543e042d3f348e398a061923ac05b507f3f4d95f11938aa9/cryptography-44.0.2-cp39-abi3-win_amd64.whl", hash = "sha256:5f6f90b72d8ccadb9c6e311c775c8305381db88374c65fa1a68250aa8a9cb3a6", size = 3210957 }, + { url = "https://files.pythonhosted.org/packages/99/10/173be140714d2ebaea8b641ff801cbcb3ef23101a2981cbf08057876f89e/cryptography-44.0.2-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:af4ff3e388f2fa7bff9f7f2b31b87d5651c45731d3e8cfa0944be43dff5cfbdb", size = 3396886 }, + { url = "https://files.pythonhosted.org/packages/2f/b4/424ea2d0fce08c24ede307cead3409ecbfc2f566725d4701b9754c0a1174/cryptography-44.0.2-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:0529b1d5a0105dd3731fa65680b45ce49da4d8115ea76e9da77a875396727b41", size = 3892387 }, + { url = "https://files.pythonhosted.org/packages/28/20/8eaa1a4f7c68a1cb15019dbaad59c812d4df4fac6fd5f7b0b9c5177f1edd/cryptography-44.0.2-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:7ca25849404be2f8e4b3c59483d9d3c51298a22c1c61a0e84415104dacaf5562", size = 4109922 }, + { url = "https://files.pythonhosted.org/packages/11/25/5ed9a17d532c32b3bc81cc294d21a36c772d053981c22bd678396bc4ae30/cryptography-44.0.2-pp310-pypy310_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:268e4e9b177c76d569e8a145a6939eca9a5fec658c932348598818acf31ae9a5", size = 3895715 }, + { url = "https://files.pythonhosted.org/packages/63/31/2aac03b19c6329b62c45ba4e091f9de0b8f687e1b0cd84f101401bece343/cryptography-44.0.2-pp310-pypy310_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:9eb9d22b0a5d8fd9925a7764a054dca914000607dff201a24c791ff5c799e1fa", size = 4109876 }, + { url = "https://files.pythonhosted.org/packages/99/ec/6e560908349843718db1a782673f36852952d52a55ab14e46c42c8a7690a/cryptography-44.0.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:2bf7bf75f7df9715f810d1b038870309342bff3069c5bd8c6b96128cb158668d", size = 3131719 }, + { url = "https://files.pythonhosted.org/packages/d6/d7/f30e75a6aa7d0f65031886fa4a1485c2fbfe25a1896953920f6a9cfe2d3b/cryptography-44.0.2-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:909c97ab43a9c0c0b0ada7a1281430e4e5ec0458e6d9244c0e821bbf152f061d", size = 3887513 }, + { url = "https://files.pythonhosted.org/packages/9c/b4/7a494ce1032323ca9db9a3661894c66e0d7142ad2079a4249303402d8c71/cryptography-44.0.2-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:96e7a5e9d6e71f9f4fca8eebfd603f8e86c5225bb18eb621b2c1e50b290a9471", size = 4107432 }, + { url = "https://files.pythonhosted.org/packages/45/f8/6b3ec0bc56123b344a8d2b3264a325646d2dcdbdd9848b5e6f3d37db90b3/cryptography-44.0.2-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:d1b3031093a366ac767b3feb8bcddb596671b3aaff82d4050f984da0c248b615", size = 3891421 }, + { url = "https://files.pythonhosted.org/packages/57/ff/f3b4b2d007c2a646b0f69440ab06224f9cf37a977a72cdb7b50632174e8a/cryptography-44.0.2-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:04abd71114848aa25edb28e225ab5f268096f44cf0127f3d36975bdf1bdf3390", size = 4107081 }, +] + +[[package]] +name = "datafusion" +version = "46.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyarrow" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4f/30/a75e7f3f2dbeb7c5de7197ee855d9fae64306dd28b8db0f55c88e39b6150/datafusion-46.0.0.tar.gz", hash = "sha256:e8adb6b987068585bc12f63ea51776919b09376833b2412da6cc8ff77d85c553", size = 151882 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ba/e9/2d76dbd1df81fc3658a990bf3c70aab7eac507ea391bdda083a7a804121c/datafusion-46.0.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:4517b6d2d35fb7d9044519a9ef3388ed504d82cbbaaba4bf3fef1da673c8357a", size = 23212736 }, + { url = "https://files.pythonhosted.org/packages/91/c4/28f7a020776d527fb72f83b8a8f961eb7d3fd847124b3a1bc8fbba7f9cd6/datafusion-46.0.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:2770a1606a9d0f1f9d64648e3777413bbcff0bee7b9a1ccb067f989e3a1c7b96", size = 21309100 }, + { url = "https://files.pythonhosted.org/packages/7b/db/120aeadfdb145d9e405847707561db5494a20445b09d17962e15b0fe43c2/datafusion-46.0.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc115441c17036ddd730bb6281ecc88269973691ec51279be173bd3cc3e5d219", size = 25883076 }, + { url = "https://files.pythonhosted.org/packages/ad/51/b65bf0b884b118aa62ab949580c5d8bc27892221f01f21544fb30121f789/datafusion-46.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:f40d9eeaa0ac4776bf0cea943228c945ec5bae8032263ad7871c8d82e8a2725a", size = 24956506 }, + { url = "https://files.pythonhosted.org/packages/d9/a1/ab6cead40c9c815b696bdef325ee18f4f167b5d78564585b71f9ec0e4442/datafusion-46.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:2b783b3fd8ee91bedfcdb353a3e700d65b61552a84e072940f78e0cde461bb98", size = 25208291 }, +] + +[[package]] +name = "datafusion-table-providers" +version = "0.1.0" +source = { editable = "." } +dependencies = [ + { name = "datafusion" }, +] + +[package.dev-dependencies] +dev = [ + { name = "maturin" }, + { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "numpy", version = "2.2.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "pyarrow" }, + { name = "pygithub" }, + { name = "pytest" }, + { name = "pytest-asyncio" }, + { name = "ruff" }, + { name = "toml" }, +] + +[package.metadata] +requires-dist = [{ name = "datafusion", specifier = ">=45.0.0" }] + +[package.metadata.requires-dev] +dev = [ + { name = "maturin", specifier = ">=1.8.1" }, + { name = "numpy", specifier = ">1.25.0" }, + { name = "pyarrow", specifier = ">=19.0.1" }, + { name = "pygithub", specifier = "==2.5.0" }, + { name = "pytest", specifier = ">=7.4.4" }, + { name = "pytest-asyncio", specifier = ">=0.23.3" }, + { name = "ruff", specifier = ">=0.9.1" }, + { name = "toml", specifier = ">=0.10.2" }, +] + +[[package]] +name = "deprecated" +version = "1.2.18" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/98/97/06afe62762c9a8a86af0cfb7bfdab22a43ad17138b07af5b1a58442690a2/deprecated-1.2.18.tar.gz", hash = "sha256:422b6f6d859da6f2ef57857761bfb392480502a64c3028ca9bbe86085d72115d", size = 2928744 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6e/c6/ac0b6c1e2d138f1002bcf799d330bd6d85084fece321e662a14223794041/Deprecated-1.2.18-py2.py3-none-any.whl", hash = "sha256:bd5011788200372a32418f888e326a09ff80d0214bd961147cfed01b5c018eec", size = 9998 }, +] + +[[package]] +name = "exceptiongroup" +version = "1.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/09/35/2495c4ac46b980e4ca1f6ad6db102322ef3ad2410b79fdde159a4b0f3b92/exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc", size = 28883 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/02/cc/b7e31358aac6ed1ef2bb790a9746ac2c69bcb3c8588b41616914eb106eaf/exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b", size = 16453 }, +] + +[[package]] +name = "idna" +version = "3.10" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442 }, +] + +[[package]] +name = "iniconfig" +version = "2.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050 }, +] + +[[package]] +name = "maturin" +version = "1.8.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "tomli", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/30/0b/3fd746cf5cfa3c8d7e20ea08c0dbc2c2c765ae051d0fc43d808a38bc9548/maturin-1.8.3.tar.gz", hash = "sha256:304762f86fd53a8031b1bf006d12572a2aa0a5235485031113195cc0152e1e12", size = 199656 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/80/08579d0184ba743345bbd350a3b6419510ff8a4f6e9e671f713160d41fbb/maturin-1.8.3-py3-none-linux_armv6l.whl", hash = "sha256:fa27466b627150123729b2e611f9f9cfade84d24385d72c6877f78c30de30e89", size = 7758366 }, + { url = "https://files.pythonhosted.org/packages/87/1c/00755d28ae277daa828e183c3d118e2923e8b8f0cba4ff708b15d274ac0e/maturin-1.8.3-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:583404d20d7f1d9c8f3c18dcab9014faacabbed6be02da80062c06cd0e279554", size = 15201378 }, + { url = "https://files.pythonhosted.org/packages/58/9f/b8738dc55ba3eb149ad2686d3f9b3c24e44b7baff46cc6baa85e5dc8cf5e/maturin-1.8.3-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:f9ffdac53dfe0089cf19b597410bc552eb34c856ddb41482b243a695e8b549d3", size = 7934215 }, + { url = "https://files.pythonhosted.org/packages/18/68/300f1a279486d6f63b624a76b81f0fba82545d027127c1ca4d5ded0d1b43/maturin-1.8.3-py3-none-manylinux_2_12_i686.manylinux2010_i686.musllinux_1_1_i686.whl", hash = "sha256:7949a4a17637341f84e88f4cbf0c155998780bbb7a145ed735725b907881c0ae", size = 7791270 }, + { url = "https://files.pythonhosted.org/packages/2e/6d/bf1b8bb9a8b1d9adad242b4089794be318446142975762d04f04ffabae40/maturin-1.8.3-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.musllinux_1_1_x86_64.whl", hash = "sha256:11564fac7486313b7baf3aa4e82c20e1b20364aad3fde2ccbc4c07693c0b7e16", size = 8282824 }, + { url = "https://files.pythonhosted.org/packages/18/e9/c601de8699e546774d3f9e761eab374988f88e4ed7006afbb5ff61bb41c0/maturin-1.8.3-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:8555d8701cdba6c19c4705a22ce4d2a1814efd792f55dc5873262ff903317540", size = 7595855 }, + { url = "https://files.pythonhosted.org/packages/34/b8/82ae650e6b589289f4219a480f2b220bcf3d9391ae9ea02cc7a58ef59cfc/maturin-1.8.3-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.musllinux_1_1_armv7l.whl", hash = "sha256:5b2a513468c1c9b4d1728d4b6d3d044b7c183985ef819c9ef15e373b70d99c7d", size = 7634888 }, + { url = "https://files.pythonhosted.org/packages/ac/4a/4f898a66cf4697267c447a6f240b6cbcc2dcacd3cab89735bfbd44810be1/maturin-1.8.3-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.musllinux_1_1_ppc64le.whl", hash = "sha256:2fe8fdff420cfccde127bbc3d8e40835163dcebb6d28c49fffd9aaf1e6ec1090", size = 9810616 }, + { url = "https://files.pythonhosted.org/packages/31/bd/21b0d471e6ade0801f27c33672bca4d563eb1d1e624e534f3bef8a01b1ac/maturin-1.8.3-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:22cd8b6dc490fee99a62590f914f0c04ddad1dd6dbd5c7e933b3f882b1bd78c2", size = 10923701 }, + { url = "https://files.pythonhosted.org/packages/6c/07/85c32f03ed757c0626e2fc5a9d6454988228be96a756991702a2a757691b/maturin-1.8.3-py3-none-win32.whl", hash = "sha256:2427924546c9d1079b1c0c6c5c1d380e1b8187baba4e6342cca81597a0f3d697", size = 7016502 }, + { url = "https://files.pythonhosted.org/packages/d5/62/f92a130a370dd7aca13c316844b82853647f048cfe1594a81f628ab7101f/maturin-1.8.3-py3-none-win_amd64.whl", hash = "sha256:85f2b882d8235c1c1cb0a38d382ccd5b3ba0674d99cb548d49df9342cc688e36", size = 7953286 }, + { url = "https://files.pythonhosted.org/packages/04/95/8379140838cd95472de843e982d0bf674e8dbf25a899c44e2f76b15704d9/maturin-1.8.3-py3-none-win_arm64.whl", hash = "sha256:33939aabf9a06a8a14ca6c399d32616c7e574fcca8d4ff6dcd984441051f32fb", size = 6687772 }, +] + +[[package]] +name = "numpy" +version = "2.0.2" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.10'", +] +sdist = { url = "https://files.pythonhosted.org/packages/a9/75/10dd1f8116a8b796cb2c737b674e02d02e80454bda953fa7e65d8c12b016/numpy-2.0.2.tar.gz", hash = "sha256:883c987dee1880e2a864ab0dc9892292582510604156762362d9326444636e78", size = 18902015 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/21/91/3495b3237510f79f5d81f2508f9f13fea78ebfdf07538fc7444badda173d/numpy-2.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:51129a29dbe56f9ca83438b706e2e69a39892b5eda6cedcb6b0c9fdc9b0d3ece", size = 21165245 }, + { url = "https://files.pythonhosted.org/packages/05/33/26178c7d437a87082d11019292dce6d3fe6f0e9026b7b2309cbf3e489b1d/numpy-2.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f15975dfec0cf2239224d80e32c3170b1d168335eaedee69da84fbe9f1f9cd04", size = 13738540 }, + { url = "https://files.pythonhosted.org/packages/ec/31/cc46e13bf07644efc7a4bf68df2df5fb2a1a88d0cd0da9ddc84dc0033e51/numpy-2.0.2-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:8c5713284ce4e282544c68d1c3b2c7161d38c256d2eefc93c1d683cf47683e66", size = 5300623 }, + { url = "https://files.pythonhosted.org/packages/6e/16/7bfcebf27bb4f9d7ec67332ffebee4d1bf085c84246552d52dbb548600e7/numpy-2.0.2-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:becfae3ddd30736fe1889a37f1f580e245ba79a5855bff5f2a29cb3ccc22dd7b", size = 6901774 }, + { url = "https://files.pythonhosted.org/packages/f9/a3/561c531c0e8bf082c5bef509d00d56f82e0ea7e1e3e3a7fc8fa78742a6e5/numpy-2.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2da5960c3cf0df7eafefd806d4e612c5e19358de82cb3c343631188991566ccd", size = 13907081 }, + { url = "https://files.pythonhosted.org/packages/fa/66/f7177ab331876200ac7563a580140643d1179c8b4b6a6b0fc9838de2a9b8/numpy-2.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:496f71341824ed9f3d2fd36cf3ac57ae2e0165c143b55c3a035ee219413f3318", size = 19523451 }, + { url = "https://files.pythonhosted.org/packages/25/7f/0b209498009ad6453e4efc2c65bcdf0ae08a182b2b7877d7ab38a92dc542/numpy-2.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a61ec659f68ae254e4d237816e33171497e978140353c0c2038d46e63282d0c8", size = 19927572 }, + { url = "https://files.pythonhosted.org/packages/3e/df/2619393b1e1b565cd2d4c4403bdd979621e2c4dea1f8532754b2598ed63b/numpy-2.0.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d731a1c6116ba289c1e9ee714b08a8ff882944d4ad631fd411106a30f083c326", size = 14400722 }, + { url = "https://files.pythonhosted.org/packages/22/ad/77e921b9f256d5da36424ffb711ae79ca3f451ff8489eeca544d0701d74a/numpy-2.0.2-cp310-cp310-win32.whl", hash = "sha256:984d96121c9f9616cd33fbd0618b7f08e0cfc9600a7ee1d6fd9b239186d19d97", size = 6472170 }, + { url = "https://files.pythonhosted.org/packages/10/05/3442317535028bc29cf0c0dd4c191a4481e8376e9f0db6bcf29703cadae6/numpy-2.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:c7b0be4ef08607dd04da4092faee0b86607f111d5ae68036f16cc787e250a131", size = 15905558 }, + { url = "https://files.pythonhosted.org/packages/8b/cf/034500fb83041aa0286e0fb16e7c76e5c8b67c0711bb6e9e9737a717d5fe/numpy-2.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:49ca4decb342d66018b01932139c0961a8f9ddc7589611158cb3c27cbcf76448", size = 21169137 }, + { url = "https://files.pythonhosted.org/packages/4a/d9/32de45561811a4b87fbdee23b5797394e3d1504b4a7cf40c10199848893e/numpy-2.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:11a76c372d1d37437857280aa142086476136a8c0f373b2e648ab2c8f18fb195", size = 13703552 }, + { url = "https://files.pythonhosted.org/packages/c1/ca/2f384720020c7b244d22508cb7ab23d95f179fcfff33c31a6eeba8d6c512/numpy-2.0.2-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:807ec44583fd708a21d4a11d94aedf2f4f3c3719035c76a2bbe1fe8e217bdc57", size = 5298957 }, + { url = "https://files.pythonhosted.org/packages/0e/78/a3e4f9fb6aa4e6fdca0c5428e8ba039408514388cf62d89651aade838269/numpy-2.0.2-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:8cafab480740e22f8d833acefed5cc87ce276f4ece12fdaa2e8903db2f82897a", size = 6905573 }, + { url = "https://files.pythonhosted.org/packages/a0/72/cfc3a1beb2caf4efc9d0b38a15fe34025230da27e1c08cc2eb9bfb1c7231/numpy-2.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a15f476a45e6e5a3a79d8a14e62161d27ad897381fecfa4a09ed5322f2085669", size = 13914330 }, + { url = "https://files.pythonhosted.org/packages/ba/a8/c17acf65a931ce551fee11b72e8de63bf7e8a6f0e21add4c937c83563538/numpy-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13e689d772146140a252c3a28501da66dfecd77490b498b168b501835041f951", size = 19534895 }, + { url = "https://files.pythonhosted.org/packages/ba/86/8767f3d54f6ae0165749f84648da9dcc8cd78ab65d415494962c86fac80f/numpy-2.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9ea91dfb7c3d1c56a0e55657c0afb38cf1eeae4544c208dc465c3c9f3a7c09f9", size = 19937253 }, + { url = "https://files.pythonhosted.org/packages/df/87/f76450e6e1c14e5bb1eae6836478b1028e096fd02e85c1c37674606ab752/numpy-2.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c1c9307701fec8f3f7a1e6711f9089c06e6284b3afbbcd259f7791282d660a15", size = 14414074 }, + { url = "https://files.pythonhosted.org/packages/5c/ca/0f0f328e1e59f73754f06e1adfb909de43726d4f24c6a3f8805f34f2b0fa/numpy-2.0.2-cp311-cp311-win32.whl", hash = "sha256:a392a68bd329eafac5817e5aefeb39038c48b671afd242710b451e76090e81f4", size = 6470640 }, + { url = "https://files.pythonhosted.org/packages/eb/57/3a3f14d3a759dcf9bf6e9eda905794726b758819df4663f217d658a58695/numpy-2.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:286cd40ce2b7d652a6f22efdfc6d1edf879440e53e76a75955bc0c826c7e64dc", size = 15910230 }, + { url = "https://files.pythonhosted.org/packages/45/40/2e117be60ec50d98fa08c2f8c48e09b3edea93cfcabd5a9ff6925d54b1c2/numpy-2.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:df55d490dea7934f330006d0f81e8551ba6010a5bf035a249ef61a94f21c500b", size = 20895803 }, + { url = "https://files.pythonhosted.org/packages/46/92/1b8b8dee833f53cef3e0a3f69b2374467789e0bb7399689582314df02651/numpy-2.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8df823f570d9adf0978347d1f926b2a867d5608f434a7cff7f7908c6570dcf5e", size = 13471835 }, + { url = "https://files.pythonhosted.org/packages/7f/19/e2793bde475f1edaea6945be141aef6c8b4c669b90c90a300a8954d08f0a/numpy-2.0.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:9a92ae5c14811e390f3767053ff54eaee3bf84576d99a2456391401323f4ec2c", size = 5038499 }, + { url = "https://files.pythonhosted.org/packages/e3/ff/ddf6dac2ff0dd50a7327bcdba45cb0264d0e96bb44d33324853f781a8f3c/numpy-2.0.2-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:a842d573724391493a97a62ebbb8e731f8a5dcc5d285dfc99141ca15a3302d0c", size = 6633497 }, + { url = "https://files.pythonhosted.org/packages/72/21/67f36eac8e2d2cd652a2e69595a54128297cdcb1ff3931cfc87838874bd4/numpy-2.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c05e238064fc0610c840d1cf6a13bf63d7e391717d247f1bf0318172e759e692", size = 13621158 }, + { url = "https://files.pythonhosted.org/packages/39/68/e9f1126d757653496dbc096cb429014347a36b228f5a991dae2c6b6cfd40/numpy-2.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0123ffdaa88fa4ab64835dcbde75dcdf89c453c922f18dced6e27c90d1d0ec5a", size = 19236173 }, + { url = "https://files.pythonhosted.org/packages/d1/e9/1f5333281e4ebf483ba1c888b1d61ba7e78d7e910fdd8e6499667041cc35/numpy-2.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:96a55f64139912d61de9137f11bf39a55ec8faec288c75a54f93dfd39f7eb40c", size = 19634174 }, + { url = "https://files.pythonhosted.org/packages/71/af/a469674070c8d8408384e3012e064299f7a2de540738a8e414dcfd639996/numpy-2.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ec9852fb39354b5a45a80bdab5ac02dd02b15f44b3804e9f00c556bf24b4bded", size = 14099701 }, + { url = "https://files.pythonhosted.org/packages/d0/3d/08ea9f239d0e0e939b6ca52ad403c84a2bce1bde301a8eb4888c1c1543f1/numpy-2.0.2-cp312-cp312-win32.whl", hash = "sha256:671bec6496f83202ed2d3c8fdc486a8fc86942f2e69ff0e986140339a63bcbe5", size = 6174313 }, + { url = "https://files.pythonhosted.org/packages/b2/b5/4ac39baebf1fdb2e72585c8352c56d063b6126be9fc95bd2bb5ef5770c20/numpy-2.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:cfd41e13fdc257aa5778496b8caa5e856dc4896d4ccf01841daee1d96465467a", size = 15606179 }, + { url = "https://files.pythonhosted.org/packages/43/c1/41c8f6df3162b0c6ffd4437d729115704bd43363de0090c7f913cfbc2d89/numpy-2.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9059e10581ce4093f735ed23f3b9d283b9d517ff46009ddd485f1747eb22653c", size = 21169942 }, + { url = "https://files.pythonhosted.org/packages/39/bc/fd298f308dcd232b56a4031fd6ddf11c43f9917fbc937e53762f7b5a3bb1/numpy-2.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:423e89b23490805d2a5a96fe40ec507407b8ee786d66f7328be214f9679df6dd", size = 13711512 }, + { url = "https://files.pythonhosted.org/packages/96/ff/06d1aa3eeb1c614eda245c1ba4fb88c483bee6520d361641331872ac4b82/numpy-2.0.2-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:2b2955fa6f11907cf7a70dab0d0755159bca87755e831e47932367fc8f2f2d0b", size = 5306976 }, + { url = "https://files.pythonhosted.org/packages/2d/98/121996dcfb10a6087a05e54453e28e58694a7db62c5a5a29cee14c6e047b/numpy-2.0.2-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:97032a27bd9d8988b9a97a8c4d2c9f2c15a81f61e2f21404d7e8ef00cb5be729", size = 6906494 }, + { url = "https://files.pythonhosted.org/packages/15/31/9dffc70da6b9bbf7968f6551967fc21156207366272c2a40b4ed6008dc9b/numpy-2.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e795a8be3ddbac43274f18588329c72939870a16cae810c2b73461c40718ab1", size = 13912596 }, + { url = "https://files.pythonhosted.org/packages/b9/14/78635daab4b07c0930c919d451b8bf8c164774e6a3413aed04a6d95758ce/numpy-2.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f26b258c385842546006213344c50655ff1555a9338e2e5e02a0756dc3e803dd", size = 19526099 }, + { url = "https://files.pythonhosted.org/packages/26/4c/0eeca4614003077f68bfe7aac8b7496f04221865b3a5e7cb230c9d055afd/numpy-2.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5fec9451a7789926bcf7c2b8d187292c9f93ea30284802a0ab3f5be8ab36865d", size = 19932823 }, + { url = "https://files.pythonhosted.org/packages/f1/46/ea25b98b13dccaebddf1a803f8c748680d972e00507cd9bc6dcdb5aa2ac1/numpy-2.0.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:9189427407d88ff25ecf8f12469d4d39d35bee1db5d39fc5c168c6f088a6956d", size = 14404424 }, + { url = "https://files.pythonhosted.org/packages/c8/a6/177dd88d95ecf07e722d21008b1b40e681a929eb9e329684d449c36586b2/numpy-2.0.2-cp39-cp39-win32.whl", hash = "sha256:905d16e0c60200656500c95b6b8dca5d109e23cb24abc701d41c02d74c6b3afa", size = 6476809 }, + { url = "https://files.pythonhosted.org/packages/ea/2b/7fc9f4e7ae5b507c1a3a21f0f15ed03e794c1242ea8a242ac158beb56034/numpy-2.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:a3f4ab0caa7f053f6797fcd4e1e25caee367db3112ef2b6ef82d749530768c73", size = 15911314 }, + { url = "https://files.pythonhosted.org/packages/8f/3b/df5a870ac6a3be3a86856ce195ef42eec7ae50d2a202be1f5a4b3b340e14/numpy-2.0.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:7f0a0c6f12e07fa94133c8a67404322845220c06a9e80e85999afe727f7438b8", size = 21025288 }, + { url = "https://files.pythonhosted.org/packages/2c/97/51af92f18d6f6f2d9ad8b482a99fb74e142d71372da5d834b3a2747a446e/numpy-2.0.2-pp39-pypy39_pp73-macosx_14_0_x86_64.whl", hash = "sha256:312950fdd060354350ed123c0e25a71327d3711584beaef30cdaa93320c392d4", size = 6762793 }, + { url = "https://files.pythonhosted.org/packages/12/46/de1fbd0c1b5ccaa7f9a005b66761533e2f6a3e560096682683a223631fe9/numpy-2.0.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26df23238872200f63518dd2aa984cfca675d82469535dc7162dc2ee52d9dd5c", size = 19334885 }, + { url = "https://files.pythonhosted.org/packages/cc/dc/d330a6faefd92b446ec0f0dfea4c3207bb1fef3c4771d19cf4543efd2c78/numpy-2.0.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a46288ec55ebbd58947d31d72be2c63cbf839f0a63b49cb755022310792a3385", size = 15828784 }, +] + +[[package]] +name = "numpy" +version = "2.2.5" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.10'", +] +sdist = { url = "https://files.pythonhosted.org/packages/dc/b2/ce4b867d8cd9c0ee84938ae1e6a6f7926ebf928c9090d036fc3c6a04f946/numpy-2.2.5.tar.gz", hash = "sha256:a9c0d994680cd991b1cb772e8b297340085466a6fe964bc9d4e80f5e2f43c291", size = 20273920 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ef/4e/3d9e6d16237c2aa5485695f0626cbba82f6481efca2e9132368dea3b885e/numpy-2.2.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1f4a922da1729f4c40932b2af4fe84909c7a6e167e6e99f71838ce3a29f3fe26", size = 21252117 }, + { url = "https://files.pythonhosted.org/packages/38/e4/db91349d4079cd15c02ff3b4b8882a529991d6aca077db198a2f2a670406/numpy-2.2.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b6f91524d31b34f4a5fee24f5bc16dcd1491b668798b6d85585d836c1e633a6a", size = 14424615 }, + { url = "https://files.pythonhosted.org/packages/f8/59/6e5b011f553c37b008bd115c7ba7106a18f372588fbb1b430b7a5d2c41ce/numpy-2.2.5-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:19f4718c9012e3baea91a7dba661dcab2451cda2550678dc30d53acb91a7290f", size = 5428691 }, + { url = "https://files.pythonhosted.org/packages/a2/58/d5d70ebdac82b3a6ddf409b3749ca5786636e50fd64d60edb46442af6838/numpy-2.2.5-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:eb7fd5b184e5d277afa9ec0ad5e4eb562ecff541e7f60e69ee69c8d59e9aeaba", size = 6965010 }, + { url = "https://files.pythonhosted.org/packages/dc/a8/c290394be346d4e7b48a40baf292626fd96ec56a6398ace4c25d9079bc6a/numpy-2.2.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6413d48a9be53e183eb06495d8e3b006ef8f87c324af68241bbe7a39e8ff54c3", size = 14369885 }, + { url = "https://files.pythonhosted.org/packages/c2/70/fed13c70aabe7049368553e81d7ca40f305f305800a007a956d7cd2e5476/numpy-2.2.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7451f92eddf8503c9b8aa4fe6aa7e87fd51a29c2cfc5f7dbd72efde6c65acf57", size = 16418372 }, + { url = "https://files.pythonhosted.org/packages/04/ab/c3c14f25ddaecd6fc58a34858f6a93a21eea6c266ba162fa99f3d0de12ac/numpy-2.2.5-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:0bcb1d057b7571334139129b7f941588f69ce7c4ed15a9d6162b2ea54ded700c", size = 15883173 }, + { url = "https://files.pythonhosted.org/packages/50/18/f53710a19042911c7aca824afe97c203728a34b8cf123e2d94621a12edc3/numpy-2.2.5-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:36ab5b23915887543441efd0417e6a3baa08634308894316f446027611b53bf1", size = 18206881 }, + { url = "https://files.pythonhosted.org/packages/6b/ec/5b407bab82f10c65af5a5fe754728df03f960fd44d27c036b61f7b3ef255/numpy-2.2.5-cp310-cp310-win32.whl", hash = "sha256:422cc684f17bc963da5f59a31530b3936f57c95a29743056ef7a7903a5dbdf88", size = 6609852 }, + { url = "https://files.pythonhosted.org/packages/b6/f5/467ca8675c7e6c567f571d8db942cc10a87588bd9e20a909d8af4171edda/numpy-2.2.5-cp310-cp310-win_amd64.whl", hash = "sha256:e4f0b035d9d0ed519c813ee23e0a733db81ec37d2e9503afbb6e54ccfdee0fa7", size = 12944922 }, + { url = "https://files.pythonhosted.org/packages/f5/fb/e4e4c254ba40e8f0c78218f9e86304628c75b6900509b601c8433bdb5da7/numpy-2.2.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c42365005c7a6c42436a54d28c43fe0e01ca11eb2ac3cefe796c25a5f98e5e9b", size = 21256475 }, + { url = "https://files.pythonhosted.org/packages/81/32/dd1f7084f5c10b2caad778258fdaeedd7fbd8afcd2510672811e6138dfac/numpy-2.2.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:498815b96f67dc347e03b719ef49c772589fb74b8ee9ea2c37feae915ad6ebda", size = 14461474 }, + { url = "https://files.pythonhosted.org/packages/0e/65/937cdf238ef6ac54ff749c0f66d9ee2b03646034c205cea9b6c51f2f3ad1/numpy-2.2.5-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:6411f744f7f20081b1b4e7112e0f4c9c5b08f94b9f086e6f0adf3645f85d3a4d", size = 5426875 }, + { url = "https://files.pythonhosted.org/packages/25/17/814515fdd545b07306eaee552b65c765035ea302d17de1b9cb50852d2452/numpy-2.2.5-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:9de6832228f617c9ef45d948ec1cd8949c482238d68b2477e6f642c33a7b0a54", size = 6969176 }, + { url = "https://files.pythonhosted.org/packages/e5/32/a66db7a5c8b5301ec329ab36d0ecca23f5e18907f43dbd593c8ec326d57c/numpy-2.2.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:369e0d4647c17c9363244f3468f2227d557a74b6781cb62ce57cf3ef5cc7c610", size = 14374850 }, + { url = "https://files.pythonhosted.org/packages/ad/c9/1bf6ada582eebcbe8978f5feb26584cd2b39f94ededeea034ca8f84af8c8/numpy-2.2.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:262d23f383170f99cd9191a7c85b9a50970fe9069b2f8ab5d786eca8a675d60b", size = 16430306 }, + { url = "https://files.pythonhosted.org/packages/6a/f0/3f741863f29e128f4fcfdb99253cc971406b402b4584663710ee07f5f7eb/numpy-2.2.5-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:aa70fdbdc3b169d69e8c59e65c07a1c9351ceb438e627f0fdcd471015cd956be", size = 15884767 }, + { url = "https://files.pythonhosted.org/packages/98/d9/4ccd8fd6410f7bf2d312cbc98892e0e43c2fcdd1deae293aeb0a93b18071/numpy-2.2.5-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:37e32e985f03c06206582a7323ef926b4e78bdaa6915095ef08070471865b906", size = 18219515 }, + { url = "https://files.pythonhosted.org/packages/b1/56/783237243d4395c6dd741cf16eeb1a9035ee3d4310900e6b17e875d1b201/numpy-2.2.5-cp311-cp311-win32.whl", hash = "sha256:f5045039100ed58fa817a6227a356240ea1b9a1bc141018864c306c1a16d4175", size = 6607842 }, + { url = "https://files.pythonhosted.org/packages/98/89/0c93baaf0094bdaaaa0536fe61a27b1dce8a505fa262a865ec142208cfe9/numpy-2.2.5-cp311-cp311-win_amd64.whl", hash = "sha256:b13f04968b46ad705f7c8a80122a42ae8f620536ea38cf4bdd374302926424dd", size = 12949071 }, + { url = "https://files.pythonhosted.org/packages/e2/f7/1fd4ff108cd9d7ef929b8882692e23665dc9c23feecafbb9c6b80f4ec583/numpy-2.2.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ee461a4eaab4f165b68780a6a1af95fb23a29932be7569b9fab666c407969051", size = 20948633 }, + { url = "https://files.pythonhosted.org/packages/12/03/d443c278348371b20d830af155ff2079acad6a9e60279fac2b41dbbb73d8/numpy-2.2.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ec31367fd6a255dc8de4772bd1658c3e926d8e860a0b6e922b615e532d320ddc", size = 14176123 }, + { url = "https://files.pythonhosted.org/packages/2b/0b/5ca264641d0e7b14393313304da48b225d15d471250376f3fbdb1a2be603/numpy-2.2.5-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:47834cde750d3c9f4e52c6ca28a7361859fcaf52695c7dc3cc1a720b8922683e", size = 5163817 }, + { url = "https://files.pythonhosted.org/packages/04/b3/d522672b9e3d28e26e1613de7675b441bbd1eaca75db95680635dd158c67/numpy-2.2.5-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:2c1a1c6ccce4022383583a6ded7bbcda22fc635eb4eb1e0a053336425ed36dfa", size = 6698066 }, + { url = "https://files.pythonhosted.org/packages/a0/93/0f7a75c1ff02d4b76df35079676b3b2719fcdfb39abdf44c8b33f43ef37d/numpy-2.2.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d75f338f5f79ee23548b03d801d28a505198297534f62416391857ea0479571", size = 14087277 }, + { url = "https://files.pythonhosted.org/packages/b0/d9/7c338b923c53d431bc837b5b787052fef9ae68a56fe91e325aac0d48226e/numpy-2.2.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a801fef99668f309b88640e28d261991bfad9617c27beda4a3aec4f217ea073", size = 16135742 }, + { url = "https://files.pythonhosted.org/packages/2d/10/4dec9184a5d74ba9867c6f7d1e9f2e0fb5fe96ff2bf50bb6f342d64f2003/numpy-2.2.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:abe38cd8381245a7f49967a6010e77dbf3680bd3627c0fe4362dd693b404c7f8", size = 15581825 }, + { url = "https://files.pythonhosted.org/packages/80/1f/2b6fcd636e848053f5b57712a7d1880b1565eec35a637fdfd0a30d5e738d/numpy-2.2.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5a0ac90e46fdb5649ab6369d1ab6104bfe5854ab19b645bf5cda0127a13034ae", size = 17899600 }, + { url = "https://files.pythonhosted.org/packages/ec/87/36801f4dc2623d76a0a3835975524a84bd2b18fe0f8835d45c8eae2f9ff2/numpy-2.2.5-cp312-cp312-win32.whl", hash = "sha256:0cd48122a6b7eab8f06404805b1bd5856200e3ed6f8a1b9a194f9d9054631beb", size = 6312626 }, + { url = "https://files.pythonhosted.org/packages/8b/09/4ffb4d6cfe7ca6707336187951992bd8a8b9142cf345d87ab858d2d7636a/numpy-2.2.5-cp312-cp312-win_amd64.whl", hash = "sha256:ced69262a8278547e63409b2653b372bf4baff0870c57efa76c5703fd6543282", size = 12645715 }, + { url = "https://files.pythonhosted.org/packages/e2/a0/0aa7f0f4509a2e07bd7a509042967c2fab635690d4f48c6c7b3afd4f448c/numpy-2.2.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:059b51b658f4414fff78c6d7b1b4e18283ab5fa56d270ff212d5ba0c561846f4", size = 20935102 }, + { url = "https://files.pythonhosted.org/packages/7e/e4/a6a9f4537542912ec513185396fce52cdd45bdcf3e9d921ab02a93ca5aa9/numpy-2.2.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:47f9ed103af0bc63182609044b0490747e03bd20a67e391192dde119bf43d52f", size = 14191709 }, + { url = "https://files.pythonhosted.org/packages/be/65/72f3186b6050bbfe9c43cb81f9df59ae63603491d36179cf7a7c8d216758/numpy-2.2.5-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:261a1ef047751bb02f29dfe337230b5882b54521ca121fc7f62668133cb119c9", size = 5149173 }, + { url = "https://files.pythonhosted.org/packages/e5/e9/83e7a9432378dde5802651307ae5e9ea07bb72b416728202218cd4da2801/numpy-2.2.5-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:4520caa3807c1ceb005d125a75e715567806fed67e315cea619d5ec6e75a4191", size = 6684502 }, + { url = "https://files.pythonhosted.org/packages/ea/27/b80da6c762394c8ee516b74c1f686fcd16c8f23b14de57ba0cad7349d1d2/numpy-2.2.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3d14b17b9be5f9c9301f43d2e2a4886a33b53f4e6fdf9ca2f4cc60aeeee76372", size = 14084417 }, + { url = "https://files.pythonhosted.org/packages/aa/fc/ebfd32c3e124e6a1043e19c0ab0769818aa69050ce5589b63d05ff185526/numpy-2.2.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ba321813a00e508d5421104464510cc962a6f791aa2fca1c97b1e65027da80d", size = 16133807 }, + { url = "https://files.pythonhosted.org/packages/bf/9b/4cc171a0acbe4666f7775cfd21d4eb6bb1d36d3a0431f48a73e9212d2278/numpy-2.2.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4cbdef3ddf777423060c6f81b5694bad2dc9675f110c4b2a60dc0181543fac7", size = 15575611 }, + { url = "https://files.pythonhosted.org/packages/a3/45/40f4135341850df48f8edcf949cf47b523c404b712774f8855a64c96ef29/numpy-2.2.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:54088a5a147ab71a8e7fdfd8c3601972751ded0739c6b696ad9cb0343e21ab73", size = 17895747 }, + { url = "https://files.pythonhosted.org/packages/f8/4c/b32a17a46f0ffbde8cc82df6d3daeaf4f552e346df143e1b188a701a8f09/numpy-2.2.5-cp313-cp313-win32.whl", hash = "sha256:c8b82a55ef86a2d8e81b63da85e55f5537d2157165be1cb2ce7cfa57b6aef38b", size = 6309594 }, + { url = "https://files.pythonhosted.org/packages/13/ae/72e6276feb9ef06787365b05915bfdb057d01fceb4a43cb80978e518d79b/numpy-2.2.5-cp313-cp313-win_amd64.whl", hash = "sha256:d8882a829fd779f0f43998e931c466802a77ca1ee0fe25a3abe50278616b1471", size = 12638356 }, + { url = "https://files.pythonhosted.org/packages/79/56/be8b85a9f2adb688e7ded6324e20149a03541d2b3297c3ffc1a73f46dedb/numpy-2.2.5-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:e8b025c351b9f0e8b5436cf28a07fa4ac0204d67b38f01433ac7f9b870fa38c6", size = 20963778 }, + { url = "https://files.pythonhosted.org/packages/ff/77/19c5e62d55bff507a18c3cdff82e94fe174957bad25860a991cac719d3ab/numpy-2.2.5-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:8dfa94b6a4374e7851bbb6f35e6ded2120b752b063e6acdd3157e4d2bb922eba", size = 14207279 }, + { url = "https://files.pythonhosted.org/packages/75/22/aa11f22dc11ff4ffe4e849d9b63bbe8d4ac6d5fae85ddaa67dfe43be3e76/numpy-2.2.5-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:97c8425d4e26437e65e1d189d22dff4a079b747ff9c2788057bfb8114ce1e133", size = 5199247 }, + { url = "https://files.pythonhosted.org/packages/4f/6c/12d5e760fc62c08eded0394f62039f5a9857f758312bf01632a81d841459/numpy-2.2.5-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:352d330048c055ea6db701130abc48a21bec690a8d38f8284e00fab256dc1376", size = 6711087 }, + { url = "https://files.pythonhosted.org/packages/ef/94/ece8280cf4218b2bee5cec9567629e61e51b4be501e5c6840ceb593db945/numpy-2.2.5-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b4c0773b6ada798f51f0f8e30c054d32304ccc6e9c5d93d46cb26f3d385ab19", size = 14059964 }, + { url = "https://files.pythonhosted.org/packages/39/41/c5377dac0514aaeec69115830a39d905b1882819c8e65d97fc60e177e19e/numpy-2.2.5-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:55f09e00d4dccd76b179c0f18a44f041e5332fd0e022886ba1c0bbf3ea4a18d0", size = 16121214 }, + { url = "https://files.pythonhosted.org/packages/db/54/3b9f89a943257bc8e187145c6bc0eb8e3d615655f7b14e9b490b053e8149/numpy-2.2.5-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:02f226baeefa68f7d579e213d0f3493496397d8f1cff5e2b222af274c86a552a", size = 15575788 }, + { url = "https://files.pythonhosted.org/packages/b1/c4/2e407e85df35b29f79945751b8f8e671057a13a376497d7fb2151ba0d290/numpy-2.2.5-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c26843fd58f65da9491165072da2cccc372530681de481ef670dcc8e27cfb066", size = 17893672 }, + { url = "https://files.pythonhosted.org/packages/29/7e/d0b44e129d038dba453f00d0e29ebd6eaf2f06055d72b95b9947998aca14/numpy-2.2.5-cp313-cp313t-win32.whl", hash = "sha256:1a161c2c79ab30fe4501d5a2bbfe8b162490757cf90b7f05be8b80bc02f7bb8e", size = 6377102 }, + { url = "https://files.pythonhosted.org/packages/63/be/b85e4aa4bf42c6502851b971f1c326d583fcc68227385f92089cf50a7b45/numpy-2.2.5-cp313-cp313t-win_amd64.whl", hash = "sha256:d403c84991b5ad291d3809bace5e85f4bbf44a04bdc9a88ed2bb1807b3360bb8", size = 12750096 }, + { url = "https://files.pythonhosted.org/packages/35/e4/5ef5ef1d4308f96961198b2323bfc7c7afb0ccc0d623b01c79bc87ab496d/numpy-2.2.5-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:b4ea7e1cff6784e58fe281ce7e7f05036b3e1c89c6f922a6bfbc0a7e8768adbe", size = 21083404 }, + { url = "https://files.pythonhosted.org/packages/a3/5f/bde9238e8e977652a16a4b114ed8aa8bb093d718c706eeecb5f7bfa59572/numpy-2.2.5-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:d7543263084a85fbc09c704b515395398d31d6395518446237eac219eab9e55e", size = 6828578 }, + { url = "https://files.pythonhosted.org/packages/ef/7f/813f51ed86e559ab2afb6a6f33aa6baf8a560097e25e4882a938986c76c2/numpy-2.2.5-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0255732338c4fdd00996c0421884ea8a3651eea555c3a56b84892b66f696eb70", size = 16234796 }, + { url = "https://files.pythonhosted.org/packages/68/67/1175790323026d3337cc285cc9c50eca637d70472b5e622529df74bb8f37/numpy-2.2.5-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d2e3bdadaba0e040d1e7ab39db73e0afe2c74ae277f5614dad53eadbecbbb169", size = 12859001 }, +] + +[[package]] +name = "packaging" +version = "25.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469 }, +] + +[[package]] +name = "pluggy" +version = "1.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/96/2d/02d4312c973c6050a18b314a5ad0b3210edb65a906f868e31c111dede4a6/pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1", size = 67955 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669", size = 20556 }, +] + +[[package]] +name = "pyarrow" +version = "19.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7f/09/a9046344212690f0632b9c709f9bf18506522feb333c894d0de81d62341a/pyarrow-19.0.1.tar.gz", hash = "sha256:3bf266b485df66a400f282ac0b6d1b500b9d2ae73314a153dbe97d6d5cc8a99e", size = 1129437 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/36/01/b23b514d86b839956238d3f8ef206fd2728eee87ff1b8ce150a5678d9721/pyarrow-19.0.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:fc28912a2dc924dddc2087679cc8b7263accc71b9ff025a1362b004711661a69", size = 30688914 }, + { url = "https://files.pythonhosted.org/packages/c6/68/218ff7cf4a0652a933e5f2ed11274f724dd43b9813cb18dd72c0a35226a2/pyarrow-19.0.1-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:fca15aabbe9b8355800d923cc2e82c8ef514af321e18b437c3d782aa884eaeec", size = 32102866 }, + { url = "https://files.pythonhosted.org/packages/98/01/c295050d183014f4a2eb796d7d2bbfa04b6cccde7258bb68aacf6f18779b/pyarrow-19.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad76aef7f5f7e4a757fddcdcf010a8290958f09e3470ea458c80d26f4316ae89", size = 41147682 }, + { url = "https://files.pythonhosted.org/packages/40/17/a6c3db0b5f3678f33bbb552d2acbc16def67f89a72955b67b0109af23eb0/pyarrow-19.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d03c9d6f2a3dffbd62671ca070f13fc527bb1867b4ec2b98c7eeed381d4f389a", size = 42179192 }, + { url = "https://files.pythonhosted.org/packages/cf/75/c7c8e599300d8cebb6cb339014800e1c720c9db2a3fcb66aa64ec84bac72/pyarrow-19.0.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:65cf9feebab489b19cdfcfe4aa82f62147218558d8d3f0fc1e9dea0ab8e7905a", size = 40517272 }, + { url = "https://files.pythonhosted.org/packages/ef/c9/68ab123ee1528699c4d5055f645ecd1dd68ff93e4699527249d02f55afeb/pyarrow-19.0.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:41f9706fbe505e0abc10e84bf3a906a1338905cbbcf1177b71486b03e6ea6608", size = 42069036 }, + { url = "https://files.pythonhosted.org/packages/54/e3/d5cfd7654084e6c0d9c3ce949e5d9e0ccad569ae1e2d5a68a3ec03b2be89/pyarrow-19.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:c6cb2335a411b713fdf1e82a752162f72d4a7b5dbc588e32aa18383318b05866", size = 25277951 }, + { url = "https://files.pythonhosted.org/packages/a0/55/f1a8d838ec07fe3ca53edbe76f782df7b9aafd4417080eebf0b42aab0c52/pyarrow-19.0.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:cc55d71898ea30dc95900297d191377caba257612f384207fe9f8293b5850f90", size = 30713987 }, + { url = "https://files.pythonhosted.org/packages/13/12/428861540bb54c98a140ae858a11f71d041ef9e501e6b7eb965ca7909505/pyarrow-19.0.1-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:7a544ec12de66769612b2d6988c36adc96fb9767ecc8ee0a4d270b10b1c51e00", size = 32135613 }, + { url = "https://files.pythonhosted.org/packages/2f/8a/23d7cc5ae2066c6c736bce1db8ea7bc9ac3ef97ac7e1c1667706c764d2d9/pyarrow-19.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0148bb4fc158bfbc3d6dfe5001d93ebeed253793fff4435167f6ce1dc4bddeae", size = 41149147 }, + { url = "https://files.pythonhosted.org/packages/a2/7a/845d151bb81a892dfb368bf11db584cf8b216963ccce40a5cf50a2492a18/pyarrow-19.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f24faab6ed18f216a37870d8c5623f9c044566d75ec586ef884e13a02a9d62c5", size = 42178045 }, + { url = "https://files.pythonhosted.org/packages/a7/31/e7282d79a70816132cf6cae7e378adfccce9ae10352d21c2fecf9d9756dd/pyarrow-19.0.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:4982f8e2b7afd6dae8608d70ba5bd91699077323f812a0448d8b7abdff6cb5d3", size = 40532998 }, + { url = "https://files.pythonhosted.org/packages/b8/82/20f3c290d6e705e2ee9c1fa1d5a0869365ee477e1788073d8b548da8b64c/pyarrow-19.0.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:49a3aecb62c1be1d822f8bf629226d4a96418228a42f5b40835c1f10d42e4db6", size = 42084055 }, + { url = "https://files.pythonhosted.org/packages/ff/77/e62aebd343238863f2c9f080ad2ef6ace25c919c6ab383436b5b81cbeef7/pyarrow-19.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:008a4009efdb4ea3d2e18f05cd31f9d43c388aad29c636112c2966605ba33466", size = 25283133 }, + { url = "https://files.pythonhosted.org/packages/78/b4/94e828704b050e723f67d67c3535cf7076c7432cd4cf046e4bb3b96a9c9d/pyarrow-19.0.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:80b2ad2b193e7d19e81008a96e313fbd53157945c7be9ac65f44f8937a55427b", size = 30670749 }, + { url = "https://files.pythonhosted.org/packages/7e/3b/4692965e04bb1df55e2c314c4296f1eb12b4f3052d4cf43d29e076aedf66/pyarrow-19.0.1-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:ee8dec072569f43835932a3b10c55973593abc00936c202707a4ad06af7cb294", size = 32128007 }, + { url = "https://files.pythonhosted.org/packages/22/f7/2239af706252c6582a5635c35caa17cb4d401cd74a87821ef702e3888957/pyarrow-19.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d5d1ec7ec5324b98887bdc006f4d2ce534e10e60f7ad995e7875ffa0ff9cb14", size = 41144566 }, + { url = "https://files.pythonhosted.org/packages/fb/e3/c9661b2b2849cfefddd9fd65b64e093594b231b472de08ff658f76c732b2/pyarrow-19.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3ad4c0eb4e2a9aeb990af6c09e6fa0b195c8c0e7b272ecc8d4d2b6574809d34", size = 42202991 }, + { url = "https://files.pythonhosted.org/packages/fe/4f/a2c0ed309167ef436674782dfee4a124570ba64299c551e38d3fdaf0a17b/pyarrow-19.0.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:d383591f3dcbe545f6cc62daaef9c7cdfe0dff0fb9e1c8121101cabe9098cfa6", size = 40507986 }, + { url = "https://files.pythonhosted.org/packages/27/2e/29bb28a7102a6f71026a9d70d1d61df926887e36ec797f2e6acfd2dd3867/pyarrow-19.0.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b4c4156a625f1e35d6c0b2132635a237708944eb41df5fbe7d50f20d20c17832", size = 42087026 }, + { url = "https://files.pythonhosted.org/packages/16/33/2a67c0f783251106aeeee516f4806161e7b481f7d744d0d643d2f30230a5/pyarrow-19.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:5bd1618ae5e5476b7654c7b55a6364ae87686d4724538c24185bbb2952679960", size = 25250108 }, + { url = "https://files.pythonhosted.org/packages/2b/8d/275c58d4b00781bd36579501a259eacc5c6dfb369be4ddeb672ceb551d2d/pyarrow-19.0.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:e45274b20e524ae5c39d7fc1ca2aa923aab494776d2d4b316b49ec7572ca324c", size = 30653552 }, + { url = "https://files.pythonhosted.org/packages/a0/9e/e6aca5cc4ef0c7aec5f8db93feb0bde08dbad8c56b9014216205d271101b/pyarrow-19.0.1-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:d9dedeaf19097a143ed6da37f04f4051aba353c95ef507764d344229b2b740ae", size = 32103413 }, + { url = "https://files.pythonhosted.org/packages/6a/fa/a7033f66e5d4f1308c7eb0dfcd2ccd70f881724eb6fd1776657fdf65458f/pyarrow-19.0.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ebfb5171bb5f4a52319344ebbbecc731af3f021e49318c74f33d520d31ae0c4", size = 41134869 }, + { url = "https://files.pythonhosted.org/packages/2d/92/34d2569be8e7abdc9d145c98dc410db0071ac579b92ebc30da35f500d630/pyarrow-19.0.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2a21d39fbdb948857f67eacb5bbaaf36802de044ec36fbef7a1c8f0dd3a4ab2", size = 42192626 }, + { url = "https://files.pythonhosted.org/packages/0a/1f/80c617b1084fc833804dc3309aa9d8daacd46f9ec8d736df733f15aebe2c/pyarrow-19.0.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:99bc1bec6d234359743b01e70d4310d0ab240c3d6b0da7e2a93663b0158616f6", size = 40496708 }, + { url = "https://files.pythonhosted.org/packages/e6/90/83698fcecf939a611c8d9a78e38e7fed7792dcc4317e29e72cf8135526fb/pyarrow-19.0.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:1b93ef2c93e77c442c979b0d596af45e4665d8b96da598db145b0fec014b9136", size = 42075728 }, + { url = "https://files.pythonhosted.org/packages/40/49/2325f5c9e7a1c125c01ba0c509d400b152c972a47958768e4e35e04d13d8/pyarrow-19.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:d9d46e06846a41ba906ab25302cf0fd522f81aa2a85a71021826f34639ad31ef", size = 25242568 }, + { url = "https://files.pythonhosted.org/packages/3f/72/135088d995a759d4d916ec4824cb19e066585b4909ebad4ab196177aa825/pyarrow-19.0.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:c0fe3dbbf054a00d1f162fda94ce236a899ca01123a798c561ba307ca38af5f0", size = 30702371 }, + { url = "https://files.pythonhosted.org/packages/2e/01/00beeebd33d6bac701f20816a29d2018eba463616bbc07397fdf99ac4ce3/pyarrow-19.0.1-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:96606c3ba57944d128e8a8399da4812f56c7f61de8c647e3470b417f795d0ef9", size = 32116046 }, + { url = "https://files.pythonhosted.org/packages/1f/c9/23b1ea718dfe967cbd986d16cf2a31fe59d015874258baae16d7ea0ccabc/pyarrow-19.0.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f04d49a6b64cf24719c080b3c2029a3a5b16417fd5fd7c4041f94233af732f3", size = 41091183 }, + { url = "https://files.pythonhosted.org/packages/3a/d4/b4a3aa781a2c715520aa8ab4fe2e7fa49d33a1d4e71c8fc6ab7b5de7a3f8/pyarrow-19.0.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a9137cf7e1640dce4c190551ee69d478f7121b5c6f323553b319cac936395f6", size = 42171896 }, + { url = "https://files.pythonhosted.org/packages/23/1b/716d4cd5a3cbc387c6e6745d2704c4b46654ba2668260d25c402626c5ddb/pyarrow-19.0.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:7c1bca1897c28013db5e4c83944a2ab53231f541b9e0c3f4791206d0c0de389a", size = 40464851 }, + { url = "https://files.pythonhosted.org/packages/ed/bd/54907846383dcc7ee28772d7e646f6c34276a17da740002a5cefe90f04f7/pyarrow-19.0.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:58d9397b2e273ef76264b45531e9d552d8ec8a6688b7390b5be44c02a37aade8", size = 42085744 }, + { url = "https://files.pythonhosted.org/packages/16/26/0ec396ebe98adefaffc0fff8e0dc14c8912e61093226284cf4b76faffd22/pyarrow-19.0.1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:b9766a47a9cb56fefe95cb27f535038b5a195707a08bf61b180e642324963b46", size = 30701112 }, + { url = "https://files.pythonhosted.org/packages/ba/10/c35d96686bf7f13e55bb87f06fe06e7d95533c271ef7f9a5a76e26b16fc2/pyarrow-19.0.1-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:6c5941c1aac89a6c2f2b16cd64fe76bcdb94b2b1e99ca6459de4e6f07638d755", size = 32117180 }, + { url = "https://files.pythonhosted.org/packages/8c/0d/81881a55302b6847ea2ea187517faa039c219d80b55050904e354c2eddde/pyarrow-19.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd44d66093a239358d07c42a91eebf5015aa54fccba959db899f932218ac9cc8", size = 41161334 }, + { url = "https://files.pythonhosted.org/packages/af/17/ea60a07ec6f6bb0740f11715e0d22ab8fdfcc94bc729832321f498370d75/pyarrow-19.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:335d170e050bcc7da867a1ed8ffb8b44c57aaa6e0843b156a501298657b1e972", size = 42190375 }, + { url = "https://files.pythonhosted.org/packages/f2/87/4ef05a088b18082cde4950bdfca752dd31effb3ec201b8026e4816d0f3fa/pyarrow-19.0.1-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:1c7556165bd38cf0cd992df2636f8bcdd2d4b26916c6b7e646101aff3c16f76f", size = 40530649 }, + { url = "https://files.pythonhosted.org/packages/59/1e/9fb9a66a64eae4ff332a8f149d803d8c6c556714803d20d54ed2e9524a3b/pyarrow-19.0.1-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:699799f9c80bebcf1da0983ba86d7f289c5a2a5c04b945e2f2bcf7e874a91911", size = 42081576 }, + { url = "https://files.pythonhosted.org/packages/1b/ee/c110d8da8bdde8e832ccf1ff90be747cb684874e2dc8acf26840058b0c32/pyarrow-19.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:8464c9fbe6d94a7fe1599e7e8965f350fd233532868232ab2596a71586c5a429", size = 25465593 }, +] + +[[package]] +name = "pycparser" +version = "2.22" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1d/b2/31537cf4b1ca988837256c910a668b553fceb8f069bedc4b1c826024b52c/pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6", size = 172736 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/13/a3/a812df4e2dd5696d1f351d58b8fe16a405b234ad2886a0dab9183fb78109/pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc", size = 117552 }, +] + +[[package]] +name = "pygithub" +version = "2.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "deprecated" }, + { name = "pyjwt", extra = ["crypto"] }, + { name = "pynacl" }, + { name = "requests" }, + { name = "typing-extensions" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/16/ce/aa91d30040d9552c274e7ea8bd10a977600d508d579a4bb262b95eccf961/pygithub-2.5.0.tar.gz", hash = "sha256:e1613ac508a9be710920d26eb18b1905ebd9926aa49398e88151c1b526aad3cf", size = 3552804 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/37/05/bfbdbbc5d8aafd8dae9b3b6877edca561fccd8528ef5edc4e7b6d23721b5/PyGithub-2.5.0-py3-none-any.whl", hash = "sha256:b0b635999a658ab8e08720bdd3318893ff20e2275f6446fcf35bf3f44f2c0fd2", size = 375935 }, +] + +[[package]] +name = "pyjwt" +version = "2.10.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e7/46/bd74733ff231675599650d3e47f361794b22ef3e3770998dda30d3b63726/pyjwt-2.10.1.tar.gz", hash = "sha256:3cc5772eb20009233caf06e9d8a0577824723b44e6648ee0a2aedb6cf9381953", size = 87785 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/61/ad/689f02752eeec26aed679477e80e632ef1b682313be70793d798c1d5fc8f/PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb", size = 22997 }, +] + +[package.optional-dependencies] +crypto = [ + { name = "cryptography" }, +] + +[[package]] +name = "pynacl" +version = "1.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a7/22/27582568be639dfe22ddb3902225f91f2f17ceff88ce80e4db396c8986da/PyNaCl-1.5.0.tar.gz", hash = "sha256:8ac7448f09ab85811607bdd21ec2464495ac8b7c66d146bf545b0f08fb9220ba", size = 3392854 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ce/75/0b8ede18506041c0bf23ac4d8e2971b4161cd6ce630b177d0a08eb0d8857/PyNaCl-1.5.0-cp36-abi3-macosx_10_10_universal2.whl", hash = "sha256:401002a4aaa07c9414132aaed7f6836ff98f59277a234704ff66878c2ee4a0d1", size = 349920 }, + { url = "https://files.pythonhosted.org/packages/59/bb/fddf10acd09637327a97ef89d2a9d621328850a72f1fdc8c08bdf72e385f/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:52cb72a79269189d4e0dc537556f4740f7f0a9ec41c1322598799b0bdad4ef92", size = 601722 }, + { url = "https://files.pythonhosted.org/packages/5d/70/87a065c37cca41a75f2ce113a5a2c2aa7533be648b184ade58971b5f7ccc/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a36d4a9dda1f19ce6e03c9a784a2921a4b726b02e1c736600ca9c22029474394", size = 680087 }, + { url = "https://files.pythonhosted.org/packages/ee/87/f1bb6a595f14a327e8285b9eb54d41fef76c585a0edef0a45f6fc95de125/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:0c84947a22519e013607c9be43706dd42513f9e6ae5d39d3613ca1e142fba44d", size = 856678 }, + { url = "https://files.pythonhosted.org/packages/66/28/ca86676b69bf9f90e710571b67450508484388bfce09acf8a46f0b8c785f/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06b8f6fa7f5de8d5d2f7573fe8c863c051225a27b61e6860fd047b1775807858", size = 1133660 }, + { url = "https://files.pythonhosted.org/packages/3d/85/c262db650e86812585e2bc59e497a8f59948a005325a11bbbc9ecd3fe26b/PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:a422368fc821589c228f4c49438a368831cb5bbc0eab5ebe1d7fac9dded6567b", size = 663824 }, + { url = "https://files.pythonhosted.org/packages/fd/1a/cc308a884bd299b651f1633acb978e8596c71c33ca85e9dc9fa33a5399b9/PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:61f642bf2378713e2c2e1de73444a3778e5f0a38be6fee0fe532fe30060282ff", size = 1117912 }, + { url = "https://files.pythonhosted.org/packages/25/2d/b7df6ddb0c2a33afdb358f8af6ea3b8c4d1196ca45497dd37a56f0c122be/PyNaCl-1.5.0-cp36-abi3-win32.whl", hash = "sha256:e46dae94e34b085175f8abb3b0aaa7da40767865ac82c928eeb9e57e1ea8a543", size = 204624 }, + { url = "https://files.pythonhosted.org/packages/5e/22/d3db169895faaf3e2eda892f005f433a62db2decbcfbc2f61e6517adfa87/PyNaCl-1.5.0-cp36-abi3-win_amd64.whl", hash = "sha256:20f42270d27e1b6a29f54032090b972d97f0a1b0948cc52392041ef7831fee93", size = 212141 }, +] + +[[package]] +name = "pytest" +version = "8.3.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ae/3c/c9d525a414d506893f0cd8a8d0de7706446213181570cdbd766691164e40/pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845", size = 1450891 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/30/3d/64ad57c803f1fa1e963a7946b6e0fea4a70df53c1a7fed304586539c2bac/pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820", size = 343634 }, +] + +[[package]] +name = "pytest-asyncio" +version = "0.26.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytest" }, + { name = "typing-extensions", marker = "python_full_version < '3.10'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8e/c4/453c52c659521066969523e87d85d54139bbd17b78f09532fb8eb8cdb58e/pytest_asyncio-0.26.0.tar.gz", hash = "sha256:c4df2a697648241ff39e7f0e4a73050b03f123f760673956cf0d72a4990e312f", size = 54156 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/20/7f/338843f449ace853647ace35870874f69a764d251872ed1b4de9f234822c/pytest_asyncio-0.26.0-py3-none-any.whl", hash = "sha256:7b51ed894f4fbea1340262bdae5135797ebbe21d8638978e35d31c6d19f72fb0", size = 19694 }, +] + +[[package]] +name = "requests" +version = "2.32.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/63/70/2bf7780ad2d390a8d301ad0b550f1581eadbd9a20f896afe06353c2a2913/requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", size = 131218 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 }, +] + +[[package]] +name = "ruff" +version = "0.11.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d9/11/bcef6784c7e5d200b8a1f5c2ddf53e5da0efec37e6e5a44d163fb97e04ba/ruff-0.11.6.tar.gz", hash = "sha256:bec8bcc3ac228a45ccc811e45f7eb61b950dbf4cf31a67fa89352574b01c7d79", size = 4010053 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6e/1f/8848b625100ebcc8740c8bac5b5dd8ba97dd4ee210970e98832092c1635b/ruff-0.11.6-py3-none-linux_armv6l.whl", hash = "sha256:d84dcbe74cf9356d1bdb4a78cf74fd47c740bf7bdeb7529068f69b08272239a1", size = 10248105 }, + { url = "https://files.pythonhosted.org/packages/e0/47/c44036e70c6cc11e6ee24399c2a1e1f1e99be5152bd7dff0190e4b325b76/ruff-0.11.6-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:9bc583628e1096148011a5d51ff3c836f51899e61112e03e5f2b1573a9b726de", size = 11001494 }, + { url = "https://files.pythonhosted.org/packages/ed/5b/170444061650202d84d316e8f112de02d092bff71fafe060d3542f5bc5df/ruff-0.11.6-py3-none-macosx_11_0_arm64.whl", hash = "sha256:f2959049faeb5ba5e3b378709e9d1bf0cab06528b306b9dd6ebd2a312127964a", size = 10352151 }, + { url = "https://files.pythonhosted.org/packages/ff/91/f02839fb3787c678e112c8865f2c3e87cfe1744dcc96ff9fc56cfb97dda2/ruff-0.11.6-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63c5d4e30d9d0de7fedbfb3e9e20d134b73a30c1e74b596f40f0629d5c28a193", size = 10541951 }, + { url = "https://files.pythonhosted.org/packages/9e/f3/c09933306096ff7a08abede3cc2534d6fcf5529ccd26504c16bf363989b5/ruff-0.11.6-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:26a4b9a4e1439f7d0a091c6763a100cef8fbdc10d68593df6f3cfa5abdd9246e", size = 10079195 }, + { url = "https://files.pythonhosted.org/packages/e0/0d/a87f8933fccbc0d8c653cfbf44bedda69c9582ba09210a309c066794e2ee/ruff-0.11.6-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b5edf270223dd622218256569636dc3e708c2cb989242262fe378609eccf1308", size = 11698918 }, + { url = "https://files.pythonhosted.org/packages/52/7d/8eac0bd083ea8a0b55b7e4628428203441ca68cd55e0b67c135a4bc6e309/ruff-0.11.6-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:f55844e818206a9dd31ff27f91385afb538067e2dc0beb05f82c293ab84f7d55", size = 12319426 }, + { url = "https://files.pythonhosted.org/packages/c2/dc/d0c17d875662d0c86fadcf4ca014ab2001f867621b793d5d7eef01b9dcce/ruff-0.11.6-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d8f782286c5ff562e4e00344f954b9320026d8e3fae2ba9e6948443fafd9ffc", size = 11791012 }, + { url = "https://files.pythonhosted.org/packages/f9/f3/81a1aea17f1065449a72509fc7ccc3659cf93148b136ff2a8291c4bc3ef1/ruff-0.11.6-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:01c63ba219514271cee955cd0adc26a4083df1956d57847978383b0e50ffd7d2", size = 13949947 }, + { url = "https://files.pythonhosted.org/packages/61/9f/a3e34de425a668284e7024ee6fd41f452f6fa9d817f1f3495b46e5e3a407/ruff-0.11.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15adac20ef2ca296dd3d8e2bedc6202ea6de81c091a74661c3666e5c4c223ff6", size = 11471753 }, + { url = "https://files.pythonhosted.org/packages/df/c5/4a57a86d12542c0f6e2744f262257b2aa5a3783098ec14e40f3e4b3a354a/ruff-0.11.6-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:4dd6b09e98144ad7aec026f5588e493c65057d1b387dd937d7787baa531d9bc2", size = 10417121 }, + { url = "https://files.pythonhosted.org/packages/58/3f/a3b4346dff07ef5b862e2ba06d98fcbf71f66f04cf01d375e871382b5e4b/ruff-0.11.6-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:45b2e1d6c0eed89c248d024ea95074d0e09988d8e7b1dad8d3ab9a67017a5b03", size = 10073829 }, + { url = "https://files.pythonhosted.org/packages/93/cc/7ed02e0b86a649216b845b3ac66ed55d8aa86f5898c5f1691797f408fcb9/ruff-0.11.6-py3-none-musllinux_1_2_i686.whl", hash = "sha256:bd40de4115b2ec4850302f1a1d8067f42e70b4990b68838ccb9ccd9f110c5e8b", size = 11076108 }, + { url = "https://files.pythonhosted.org/packages/39/5e/5b09840fef0eff1a6fa1dea6296c07d09c17cb6fb94ed5593aa591b50460/ruff-0.11.6-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:77cda2dfbac1ab73aef5e514c4cbfc4ec1fbef4b84a44c736cc26f61b3814cd9", size = 11512366 }, + { url = "https://files.pythonhosted.org/packages/6f/4c/1cd5a84a412d3626335ae69f5f9de2bb554eea0faf46deb1f0cb48534042/ruff-0.11.6-py3-none-win32.whl", hash = "sha256:5151a871554be3036cd6e51d0ec6eef56334d74dfe1702de717a995ee3d5b287", size = 10485900 }, + { url = "https://files.pythonhosted.org/packages/42/46/8997872bc44d43df986491c18d4418f1caff03bc47b7f381261d62c23442/ruff-0.11.6-py3-none-win_amd64.whl", hash = "sha256:cce85721d09c51f3b782c331b0abd07e9d7d5f775840379c640606d3159cae0e", size = 11558592 }, + { url = "https://files.pythonhosted.org/packages/d7/6a/65fecd51a9ca19e1477c3879a7fda24f8904174d1275b419422ac00f6eee/ruff-0.11.6-py3-none-win_arm64.whl", hash = "sha256:3567ba0d07fb170b1b48d944715e3294b77f5b7679e8ba258199a250383ccb79", size = 10682766 }, +] + +[[package]] +name = "toml" +version = "0.10.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/be/ba/1f744cdc819428fc6b5084ec34d9b30660f6f9daaf70eead706e3203ec3c/toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f", size = 22253 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/44/6f/7120676b6d73228c96e17f1f794d8ab046fc910d781c8d151120c3f1569e/toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", size = 16588 }, +] + +[[package]] +name = "tomli" +version = "2.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/18/87/302344fed471e44a87289cf4967697d07e532f2421fdaf868a303cbae4ff/tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff", size = 17175 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/43/ca/75707e6efa2b37c77dadb324ae7d9571cb424e61ea73fad7c56c2d14527f/tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249", size = 131077 }, + { url = "https://files.pythonhosted.org/packages/c7/16/51ae563a8615d472fdbffc43a3f3d46588c264ac4f024f63f01283becfbb/tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6", size = 123429 }, + { url = "https://files.pythonhosted.org/packages/f1/dd/4f6cd1e7b160041db83c694abc78e100473c15d54620083dbd5aae7b990e/tomli-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a", size = 226067 }, + { url = "https://files.pythonhosted.org/packages/a9/6b/c54ede5dc70d648cc6361eaf429304b02f2871a345bbdd51e993d6cdf550/tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee", size = 236030 }, + { url = "https://files.pythonhosted.org/packages/1f/47/999514fa49cfaf7a92c805a86c3c43f4215621855d151b61c602abb38091/tomli-2.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e", size = 240898 }, + { url = "https://files.pythonhosted.org/packages/73/41/0a01279a7ae09ee1573b423318e7934674ce06eb33f50936655071d81a24/tomli-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4", size = 229894 }, + { url = "https://files.pythonhosted.org/packages/55/18/5d8bc5b0a0362311ce4d18830a5d28943667599a60d20118074ea1b01bb7/tomli-2.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106", size = 245319 }, + { url = "https://files.pythonhosted.org/packages/92/a3/7ade0576d17f3cdf5ff44d61390d4b3febb8a9fc2b480c75c47ea048c646/tomli-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8", size = 238273 }, + { url = "https://files.pythonhosted.org/packages/72/6f/fa64ef058ac1446a1e51110c375339b3ec6be245af9d14c87c4a6412dd32/tomli-2.2.1-cp311-cp311-win32.whl", hash = "sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff", size = 98310 }, + { url = "https://files.pythonhosted.org/packages/6a/1c/4a2dcde4a51b81be3530565e92eda625d94dafb46dbeb15069df4caffc34/tomli-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b", size = 108309 }, + { url = "https://files.pythonhosted.org/packages/52/e1/f8af4c2fcde17500422858155aeb0d7e93477a0d59a98e56cbfe75070fd0/tomli-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea", size = 132762 }, + { url = "https://files.pythonhosted.org/packages/03/b8/152c68bb84fc00396b83e7bbddd5ec0bd3dd409db4195e2a9b3e398ad2e3/tomli-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8", size = 123453 }, + { url = "https://files.pythonhosted.org/packages/c8/d6/fc9267af9166f79ac528ff7e8c55c8181ded34eb4b0e93daa767b8841573/tomli-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192", size = 233486 }, + { url = "https://files.pythonhosted.org/packages/5c/51/51c3f2884d7bab89af25f678447ea7d297b53b5a3b5730a7cb2ef6069f07/tomli-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222", size = 242349 }, + { url = "https://files.pythonhosted.org/packages/ab/df/bfa89627d13a5cc22402e441e8a931ef2108403db390ff3345c05253935e/tomli-2.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77", size = 252159 }, + { url = "https://files.pythonhosted.org/packages/9e/6e/fa2b916dced65763a5168c6ccb91066f7639bdc88b48adda990db10c8c0b/tomli-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6", size = 237243 }, + { url = "https://files.pythonhosted.org/packages/b4/04/885d3b1f650e1153cbb93a6a9782c58a972b94ea4483ae4ac5cedd5e4a09/tomli-2.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd", size = 259645 }, + { url = "https://files.pythonhosted.org/packages/9c/de/6b432d66e986e501586da298e28ebeefd3edc2c780f3ad73d22566034239/tomli-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e", size = 244584 }, + { url = "https://files.pythonhosted.org/packages/1c/9a/47c0449b98e6e7d1be6cbac02f93dd79003234ddc4aaab6ba07a9a7482e2/tomli-2.2.1-cp312-cp312-win32.whl", hash = "sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98", size = 98875 }, + { url = "https://files.pythonhosted.org/packages/ef/60/9b9638f081c6f1261e2688bd487625cd1e660d0a85bd469e91d8db969734/tomli-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4", size = 109418 }, + { url = "https://files.pythonhosted.org/packages/04/90/2ee5f2e0362cb8a0b6499dc44f4d7d48f8fff06d28ba46e6f1eaa61a1388/tomli-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7", size = 132708 }, + { url = "https://files.pythonhosted.org/packages/c0/ec/46b4108816de6b385141f082ba99e315501ccd0a2ea23db4a100dd3990ea/tomli-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c", size = 123582 }, + { url = "https://files.pythonhosted.org/packages/a0/bd/b470466d0137b37b68d24556c38a0cc819e8febe392d5b199dcd7f578365/tomli-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13", size = 232543 }, + { url = "https://files.pythonhosted.org/packages/d9/e5/82e80ff3b751373f7cead2815bcbe2d51c895b3c990686741a8e56ec42ab/tomli-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281", size = 241691 }, + { url = "https://files.pythonhosted.org/packages/05/7e/2a110bc2713557d6a1bfb06af23dd01e7dde52b6ee7dadc589868f9abfac/tomli-2.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272", size = 251170 }, + { url = "https://files.pythonhosted.org/packages/64/7b/22d713946efe00e0adbcdfd6d1aa119ae03fd0b60ebed51ebb3fa9f5a2e5/tomli-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140", size = 236530 }, + { url = "https://files.pythonhosted.org/packages/38/31/3a76f67da4b0cf37b742ca76beaf819dca0ebef26d78fc794a576e08accf/tomli-2.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2", size = 258666 }, + { url = "https://files.pythonhosted.org/packages/07/10/5af1293da642aded87e8a988753945d0cf7e00a9452d3911dd3bb354c9e2/tomli-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744", size = 243954 }, + { url = "https://files.pythonhosted.org/packages/5b/b9/1ed31d167be802da0fc95020d04cd27b7d7065cc6fbefdd2f9186f60d7bd/tomli-2.2.1-cp313-cp313-win32.whl", hash = "sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec", size = 98724 }, + { url = "https://files.pythonhosted.org/packages/c7/32/b0963458706accd9afcfeb867c0f9175a741bf7b19cd424230714d722198/tomli-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69", size = 109383 }, + { url = "https://files.pythonhosted.org/packages/6e/c2/61d3e0f47e2b74ef40a68b9e6ad5984f6241a942f7cd3bbfbdbd03861ea9/tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc", size = 14257 }, +] + +[[package]] +name = "typing-extensions" +version = "4.13.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f6/37/23083fcd6e35492953e8d2aaaa68b860eb422b34627b13f2ce3eb6106061/typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef", size = 106967 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8b/54/b1ae86c0973cc6f0210b53d508ca3641fb6d0c56823f288d108bc7ab3cc8/typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c", size = 45806 }, +] + +[[package]] +name = "urllib3" +version = "2.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8a/78/16493d9c386d8e60e442a35feac5e00f0913c0f4b7c217c11e8ec2ff53e0/urllib3-2.4.0.tar.gz", hash = "sha256:414bc6535b787febd7567804cc015fee39daab8ad86268f1310a9250697de466", size = 390672 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6b/11/cc635220681e93a0183390e26485430ca2c7b5f9d33b15c74c2861cb8091/urllib3-2.4.0-py3-none-any.whl", hash = "sha256:4e16665048960a0900c702d4a66415956a584919c03361cac9f1df5c5dd7e813", size = 128680 }, +] + +[[package]] +name = "wrapt" +version = "1.17.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c3/fc/e91cc220803d7bc4db93fb02facd8461c37364151b8494762cc88b0fbcef/wrapt-1.17.2.tar.gz", hash = "sha256:41388e9d4d1522446fe79d3213196bd9e3b301a336965b9e27ca2788ebd122f3", size = 55531 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/d1/1daec934997e8b160040c78d7b31789f19b122110a75eca3d4e8da0049e1/wrapt-1.17.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3d57c572081fed831ad2d26fd430d565b76aa277ed1d30ff4d40670b1c0dd984", size = 53307 }, + { url = "https://files.pythonhosted.org/packages/1b/7b/13369d42651b809389c1a7153baa01d9700430576c81a2f5c5e460df0ed9/wrapt-1.17.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b5e251054542ae57ac7f3fba5d10bfff615b6c2fb09abeb37d2f1463f841ae22", size = 38486 }, + { url = "https://files.pythonhosted.org/packages/62/bf/e0105016f907c30b4bd9e377867c48c34dc9c6c0c104556c9c9126bd89ed/wrapt-1.17.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:80dd7db6a7cb57ffbc279c4394246414ec99537ae81ffd702443335a61dbf3a7", size = 38777 }, + { url = "https://files.pythonhosted.org/packages/27/70/0f6e0679845cbf8b165e027d43402a55494779295c4b08414097b258ac87/wrapt-1.17.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a6e821770cf99cc586d33833b2ff32faebdbe886bd6322395606cf55153246c", size = 83314 }, + { url = "https://files.pythonhosted.org/packages/0f/77/0576d841bf84af8579124a93d216f55d6f74374e4445264cb378a6ed33eb/wrapt-1.17.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b60fb58b90c6d63779cb0c0c54eeb38941bae3ecf7a73c764c52c88c2dcb9d72", size = 74947 }, + { url = "https://files.pythonhosted.org/packages/90/ec/00759565518f268ed707dcc40f7eeec38637d46b098a1f5143bff488fe97/wrapt-1.17.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b870b5df5b71d8c3359d21be8f0d6c485fa0ebdb6477dda51a1ea54a9b558061", size = 82778 }, + { url = "https://files.pythonhosted.org/packages/f8/5a/7cffd26b1c607b0b0c8a9ca9d75757ad7620c9c0a9b4a25d3f8a1480fafc/wrapt-1.17.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4011d137b9955791f9084749cba9a367c68d50ab8d11d64c50ba1688c9b457f2", size = 81716 }, + { url = "https://files.pythonhosted.org/packages/7e/09/dccf68fa98e862df7e6a60a61d43d644b7d095a5fc36dbb591bbd4a1c7b2/wrapt-1.17.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:1473400e5b2733e58b396a04eb7f35f541e1fb976d0c0724d0223dd607e0f74c", size = 74548 }, + { url = "https://files.pythonhosted.org/packages/b7/8e/067021fa3c8814952c5e228d916963c1115b983e21393289de15128e867e/wrapt-1.17.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3cedbfa9c940fdad3e6e941db7138e26ce8aad38ab5fe9dcfadfed9db7a54e62", size = 81334 }, + { url = "https://files.pythonhosted.org/packages/4b/0d/9d4b5219ae4393f718699ca1c05f5ebc0c40d076f7e65fd48f5f693294fb/wrapt-1.17.2-cp310-cp310-win32.whl", hash = "sha256:582530701bff1dec6779efa00c516496968edd851fba224fbd86e46cc6b73563", size = 36427 }, + { url = "https://files.pythonhosted.org/packages/72/6a/c5a83e8f61aec1e1aeef939807602fb880e5872371e95df2137142f5c58e/wrapt-1.17.2-cp310-cp310-win_amd64.whl", hash = "sha256:58705da316756681ad3c9c73fd15499aa4d8c69f9fd38dc8a35e06c12468582f", size = 38774 }, + { url = "https://files.pythonhosted.org/packages/cd/f7/a2aab2cbc7a665efab072344a8949a71081eed1d2f451f7f7d2b966594a2/wrapt-1.17.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ff04ef6eec3eee8a5efef2401495967a916feaa353643defcc03fc74fe213b58", size = 53308 }, + { url = "https://files.pythonhosted.org/packages/50/ff/149aba8365fdacef52b31a258c4dc1c57c79759c335eff0b3316a2664a64/wrapt-1.17.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4db983e7bca53819efdbd64590ee96c9213894272c776966ca6306b73e4affda", size = 38488 }, + { url = "https://files.pythonhosted.org/packages/65/46/5a917ce85b5c3b490d35c02bf71aedaa9f2f63f2d15d9949cc4ba56e8ba9/wrapt-1.17.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9abc77a4ce4c6f2a3168ff34b1da9b0f311a8f1cfd694ec96b0603dff1c79438", size = 38776 }, + { url = "https://files.pythonhosted.org/packages/ca/74/336c918d2915a4943501c77566db41d1bd6e9f4dbc317f356b9a244dfe83/wrapt-1.17.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b929ac182f5ace000d459c59c2c9c33047e20e935f8e39371fa6e3b85d56f4a", size = 83776 }, + { url = "https://files.pythonhosted.org/packages/09/99/c0c844a5ccde0fe5761d4305485297f91d67cf2a1a824c5f282e661ec7ff/wrapt-1.17.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f09b286faeff3c750a879d336fb6d8713206fc97af3adc14def0cdd349df6000", size = 75420 }, + { url = "https://files.pythonhosted.org/packages/b4/b0/9fc566b0fe08b282c850063591a756057c3247b2362b9286429ec5bf1721/wrapt-1.17.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a7ed2d9d039bd41e889f6fb9364554052ca21ce823580f6a07c4ec245c1f5d6", size = 83199 }, + { url = "https://files.pythonhosted.org/packages/9d/4b/71996e62d543b0a0bd95dda485219856def3347e3e9380cc0d6cf10cfb2f/wrapt-1.17.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:129a150f5c445165ff941fc02ee27df65940fcb8a22a61828b1853c98763a64b", size = 82307 }, + { url = "https://files.pythonhosted.org/packages/39/35/0282c0d8789c0dc9bcc738911776c762a701f95cfe113fb8f0b40e45c2b9/wrapt-1.17.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1fb5699e4464afe5c7e65fa51d4f99e0b2eadcc176e4aa33600a3df7801d6662", size = 75025 }, + { url = "https://files.pythonhosted.org/packages/4f/6d/90c9fd2c3c6fee181feecb620d95105370198b6b98a0770cba090441a828/wrapt-1.17.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9a2bce789a5ea90e51a02dfcc39e31b7f1e662bc3317979aa7e5538e3a034f72", size = 81879 }, + { url = "https://files.pythonhosted.org/packages/8f/fa/9fb6e594f2ce03ef03eddbdb5f4f90acb1452221a5351116c7c4708ac865/wrapt-1.17.2-cp311-cp311-win32.whl", hash = "sha256:4afd5814270fdf6380616b321fd31435a462019d834f83c8611a0ce7484c7317", size = 36419 }, + { url = "https://files.pythonhosted.org/packages/47/f8/fb1773491a253cbc123c5d5dc15c86041f746ed30416535f2a8df1f4a392/wrapt-1.17.2-cp311-cp311-win_amd64.whl", hash = "sha256:acc130bc0375999da18e3d19e5a86403667ac0c4042a094fefb7eec8ebac7cf3", size = 38773 }, + { url = "https://files.pythonhosted.org/packages/a1/bd/ab55f849fd1f9a58ed7ea47f5559ff09741b25f00c191231f9f059c83949/wrapt-1.17.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:d5e2439eecc762cd85e7bd37161d4714aa03a33c5ba884e26c81559817ca0925", size = 53799 }, + { url = "https://files.pythonhosted.org/packages/53/18/75ddc64c3f63988f5a1d7e10fb204ffe5762bc663f8023f18ecaf31a332e/wrapt-1.17.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3fc7cb4c1c744f8c05cd5f9438a3caa6ab94ce8344e952d7c45a8ed59dd88392", size = 38821 }, + { url = "https://files.pythonhosted.org/packages/48/2a/97928387d6ed1c1ebbfd4efc4133a0633546bec8481a2dd5ec961313a1c7/wrapt-1.17.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8fdbdb757d5390f7c675e558fd3186d590973244fab0c5fe63d373ade3e99d40", size = 38919 }, + { url = "https://files.pythonhosted.org/packages/73/54/3bfe5a1febbbccb7a2f77de47b989c0b85ed3a6a41614b104204a788c20e/wrapt-1.17.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bb1d0dbf99411f3d871deb6faa9aabb9d4e744d67dcaaa05399af89d847a91d", size = 88721 }, + { url = "https://files.pythonhosted.org/packages/25/cb/7262bc1b0300b4b64af50c2720ef958c2c1917525238d661c3e9a2b71b7b/wrapt-1.17.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d18a4865f46b8579d44e4fe1e2bcbc6472ad83d98e22a26c963d46e4c125ef0b", size = 80899 }, + { url = "https://files.pythonhosted.org/packages/2a/5a/04cde32b07a7431d4ed0553a76fdb7a61270e78c5fd5a603e190ac389f14/wrapt-1.17.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc570b5f14a79734437cb7b0500376b6b791153314986074486e0b0fa8d71d98", size = 89222 }, + { url = "https://files.pythonhosted.org/packages/09/28/2e45a4f4771fcfb109e244d5dbe54259e970362a311b67a965555ba65026/wrapt-1.17.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6d9187b01bebc3875bac9b087948a2bccefe464a7d8f627cf6e48b1bbae30f82", size = 86707 }, + { url = "https://files.pythonhosted.org/packages/c6/d2/dcb56bf5f32fcd4bd9aacc77b50a539abdd5b6536872413fd3f428b21bed/wrapt-1.17.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:9e8659775f1adf02eb1e6f109751268e493c73716ca5761f8acb695e52a756ae", size = 79685 }, + { url = "https://files.pythonhosted.org/packages/80/4e/eb8b353e36711347893f502ce91c770b0b0929f8f0bed2670a6856e667a9/wrapt-1.17.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e8b2816ebef96d83657b56306152a93909a83f23994f4b30ad4573b00bd11bb9", size = 87567 }, + { url = "https://files.pythonhosted.org/packages/17/27/4fe749a54e7fae6e7146f1c7d914d28ef599dacd4416566c055564080fe2/wrapt-1.17.2-cp312-cp312-win32.whl", hash = "sha256:468090021f391fe0056ad3e807e3d9034e0fd01adcd3bdfba977b6fdf4213ea9", size = 36672 }, + { url = "https://files.pythonhosted.org/packages/15/06/1dbf478ea45c03e78a6a8c4be4fdc3c3bddea5c8de8a93bc971415e47f0f/wrapt-1.17.2-cp312-cp312-win_amd64.whl", hash = "sha256:ec89ed91f2fa8e3f52ae53cd3cf640d6feff92ba90d62236a81e4e563ac0e991", size = 38865 }, + { url = "https://files.pythonhosted.org/packages/ce/b9/0ffd557a92f3b11d4c5d5e0c5e4ad057bd9eb8586615cdaf901409920b14/wrapt-1.17.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6ed6ffac43aecfe6d86ec5b74b06a5be33d5bb9243d055141e8cabb12aa08125", size = 53800 }, + { url = "https://files.pythonhosted.org/packages/c0/ef/8be90a0b7e73c32e550c73cfb2fa09db62234227ece47b0e80a05073b375/wrapt-1.17.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:35621ae4c00e056adb0009f8e86e28eb4a41a4bfa8f9bfa9fca7d343fe94f998", size = 38824 }, + { url = "https://files.pythonhosted.org/packages/36/89/0aae34c10fe524cce30fe5fc433210376bce94cf74d05b0d68344c8ba46e/wrapt-1.17.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a604bf7a053f8362d27eb9fefd2097f82600b856d5abe996d623babd067b1ab5", size = 38920 }, + { url = "https://files.pythonhosted.org/packages/3b/24/11c4510de906d77e0cfb5197f1b1445d4fec42c9a39ea853d482698ac681/wrapt-1.17.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cbabee4f083b6b4cd282f5b817a867cf0b1028c54d445b7ec7cfe6505057cf8", size = 88690 }, + { url = "https://files.pythonhosted.org/packages/71/d7/cfcf842291267bf455b3e266c0c29dcb675b5540ee8b50ba1699abf3af45/wrapt-1.17.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49703ce2ddc220df165bd2962f8e03b84c89fee2d65e1c24a7defff6f988f4d6", size = 80861 }, + { url = "https://files.pythonhosted.org/packages/d5/66/5d973e9f3e7370fd686fb47a9af3319418ed925c27d72ce16b791231576d/wrapt-1.17.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8112e52c5822fc4253f3901b676c55ddf288614dc7011634e2719718eaa187dc", size = 89174 }, + { url = "https://files.pythonhosted.org/packages/a7/d3/8e17bb70f6ae25dabc1aaf990f86824e4fd98ee9cadf197054e068500d27/wrapt-1.17.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9fee687dce376205d9a494e9c121e27183b2a3df18037f89d69bd7b35bcf59e2", size = 86721 }, + { url = "https://files.pythonhosted.org/packages/6f/54/f170dfb278fe1c30d0ff864513cff526d624ab8de3254b20abb9cffedc24/wrapt-1.17.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:18983c537e04d11cf027fbb60a1e8dfd5190e2b60cc27bc0808e653e7b218d1b", size = 79763 }, + { url = "https://files.pythonhosted.org/packages/4a/98/de07243751f1c4a9b15c76019250210dd3486ce098c3d80d5f729cba029c/wrapt-1.17.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:703919b1633412ab54bcf920ab388735832fdcb9f9a00ae49387f0fe67dad504", size = 87585 }, + { url = "https://files.pythonhosted.org/packages/f9/f0/13925f4bd6548013038cdeb11ee2cbd4e37c30f8bfd5db9e5a2a370d6e20/wrapt-1.17.2-cp313-cp313-win32.whl", hash = "sha256:abbb9e76177c35d4e8568e58650aa6926040d6a9f6f03435b7a522bf1c487f9a", size = 36676 }, + { url = "https://files.pythonhosted.org/packages/bf/ae/743f16ef8c2e3628df3ddfd652b7d4c555d12c84b53f3d8218498f4ade9b/wrapt-1.17.2-cp313-cp313-win_amd64.whl", hash = "sha256:69606d7bb691b50a4240ce6b22ebb319c1cfb164e5f6569835058196e0f3a845", size = 38871 }, + { url = "https://files.pythonhosted.org/packages/3d/bc/30f903f891a82d402ffb5fda27ec1d621cc97cb74c16fea0b6141f1d4e87/wrapt-1.17.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:4a721d3c943dae44f8e243b380cb645a709ba5bd35d3ad27bc2ed947e9c68192", size = 56312 }, + { url = "https://files.pythonhosted.org/packages/8a/04/c97273eb491b5f1c918857cd26f314b74fc9b29224521f5b83f872253725/wrapt-1.17.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:766d8bbefcb9e00c3ac3b000d9acc51f1b399513f44d77dfe0eb026ad7c9a19b", size = 40062 }, + { url = "https://files.pythonhosted.org/packages/4e/ca/3b7afa1eae3a9e7fefe499db9b96813f41828b9fdb016ee836c4c379dadb/wrapt-1.17.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e496a8ce2c256da1eb98bd15803a79bee00fc351f5dfb9ea82594a3f058309e0", size = 40155 }, + { url = "https://files.pythonhosted.org/packages/89/be/7c1baed43290775cb9030c774bc53c860db140397047cc49aedaf0a15477/wrapt-1.17.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40d615e4fe22f4ad3528448c193b218e077656ca9ccb22ce2cb20db730f8d306", size = 113471 }, + { url = "https://files.pythonhosted.org/packages/32/98/4ed894cf012b6d6aae5f5cc974006bdeb92f0241775addad3f8cd6ab71c8/wrapt-1.17.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a5aaeff38654462bc4b09023918b7f21790efb807f54c000a39d41d69cf552cb", size = 101208 }, + { url = "https://files.pythonhosted.org/packages/ea/fd/0c30f2301ca94e655e5e057012e83284ce8c545df7661a78d8bfca2fac7a/wrapt-1.17.2-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a7d15bbd2bc99e92e39f49a04653062ee6085c0e18b3b7512a4f2fe91f2d681", size = 109339 }, + { url = "https://files.pythonhosted.org/packages/75/56/05d000de894c4cfcb84bcd6b1df6214297b8089a7bd324c21a4765e49b14/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e3890b508a23299083e065f435a492b5435eba6e304a7114d2f919d400888cc6", size = 110232 }, + { url = "https://files.pythonhosted.org/packages/53/f8/c3f6b2cf9b9277fb0813418e1503e68414cd036b3b099c823379c9575e6d/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:8c8b293cd65ad716d13d8dd3624e42e5a19cc2a2f1acc74b30c2c13f15cb61a6", size = 100476 }, + { url = "https://files.pythonhosted.org/packages/a7/b1/0bb11e29aa5139d90b770ebbfa167267b1fc548d2302c30c8f7572851738/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c82b8785d98cdd9fed4cac84d765d234ed3251bd6afe34cb7ac523cb93e8b4f", size = 106377 }, + { url = "https://files.pythonhosted.org/packages/6a/e1/0122853035b40b3f333bbb25f1939fc1045e21dd518f7f0922b60c156f7c/wrapt-1.17.2-cp313-cp313t-win32.whl", hash = "sha256:13e6afb7fe71fe7485a4550a8844cc9ffbe263c0f1a1eea569bc7091d4898555", size = 37986 }, + { url = "https://files.pythonhosted.org/packages/09/5e/1655cf481e079c1f22d0cabdd4e51733679932718dc23bf2db175f329b76/wrapt-1.17.2-cp313-cp313t-win_amd64.whl", hash = "sha256:eaf675418ed6b3b31c7a989fd007fa7c3be66ce14e5c3b27336383604c9da85c", size = 40750 }, + { url = "https://files.pythonhosted.org/packages/8a/f4/6ed2b8f6f1c832933283974839b88ec7c983fd12905e01e97889dadf7559/wrapt-1.17.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:99039fa9e6306880572915728d7f6c24a86ec57b0a83f6b2491e1d8ab0235b9a", size = 53308 }, + { url = "https://files.pythonhosted.org/packages/a2/a9/712a53f8f4f4545768ac532619f6e56d5d0364a87b2212531685e89aeef8/wrapt-1.17.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2696993ee1eebd20b8e4ee4356483c4cb696066ddc24bd70bcbb80fa56ff9061", size = 38489 }, + { url = "https://files.pythonhosted.org/packages/fa/9b/e172c8f28a489a2888df18f953e2f6cb8d33b1a2e78c9dfc52d8bf6a5ead/wrapt-1.17.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:612dff5db80beef9e649c6d803a8d50c409082f1fedc9dbcdfde2983b2025b82", size = 38776 }, + { url = "https://files.pythonhosted.org/packages/cf/cb/7a07b51762dcd59bdbe07aa97f87b3169766cadf240f48d1cbe70a1be9db/wrapt-1.17.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:62c2caa1585c82b3f7a7ab56afef7b3602021d6da34fbc1cf234ff139fed3cd9", size = 83050 }, + { url = "https://files.pythonhosted.org/packages/a5/51/a42757dd41032afd6d8037617aa3bc6803ba971850733b24dfb7d5c627c4/wrapt-1.17.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c958bcfd59bacc2d0249dcfe575e71da54f9dcf4a8bdf89c4cb9a68a1170d73f", size = 74718 }, + { url = "https://files.pythonhosted.org/packages/bf/bb/d552bfe47db02fcfc950fc563073a33500f8108efa5f7b41db2f83a59028/wrapt-1.17.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc78a84e2dfbc27afe4b2bd7c80c8db9bca75cc5b85df52bfe634596a1da846b", size = 82590 }, + { url = "https://files.pythonhosted.org/packages/77/99/77b06b3c3c410dbae411105bf22496facf03a5496bfaca8fbcf9da381889/wrapt-1.17.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:ba0f0eb61ef00ea10e00eb53a9129501f52385c44853dbd6c4ad3f403603083f", size = 81462 }, + { url = "https://files.pythonhosted.org/packages/2d/21/cf0bd85ae66f92600829ea1de8e1da778e5e9f6e574ccbe74b66db0d95db/wrapt-1.17.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:1e1fe0e6ab7775fd842bc39e86f6dcfc4507ab0ffe206093e76d61cde37225c8", size = 74309 }, + { url = "https://files.pythonhosted.org/packages/6d/16/112d25e9092398a0dd6fec50ab7ac1b775a0c19b428f049785096067ada9/wrapt-1.17.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c86563182421896d73858e08e1db93afdd2b947a70064b813d515d66549e15f9", size = 81081 }, + { url = "https://files.pythonhosted.org/packages/2b/49/364a615a0cc0872685646c495c7172e4fc7bf1959e3b12a1807a03014e05/wrapt-1.17.2-cp39-cp39-win32.whl", hash = "sha256:f393cda562f79828f38a819f4788641ac7c4085f30f1ce1a68672baa686482bb", size = 36423 }, + { url = "https://files.pythonhosted.org/packages/00/ad/5d2c1b34ba3202cd833d9221833e74d6500ce66730974993a8dc9a94fb8c/wrapt-1.17.2-cp39-cp39-win_amd64.whl", hash = "sha256:36ccae62f64235cf8ddb682073a60519426fdd4725524ae38874adf72b5f2aeb", size = 38772 }, + { url = "https://files.pythonhosted.org/packages/2d/82/f56956041adef78f849db6b289b282e72b55ab8045a75abad81898c28d19/wrapt-1.17.2-py3-none-any.whl", hash = "sha256:b18f2d1533a71f069c7f82d524a52599053d4c7166e9dd374ae2136b7f40f7c8", size = 23594 }, +]