diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 9dd627b01abed..51111543b4bb2 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -5,9 +5,7 @@ RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ && apt-get purge -y imagemagick imagemagick-6-common # Add protoc -# https://datafusion.apache.org/contributor-guide/getting_started.html#protoc-installation -RUN curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v25.1/protoc-25.1-linux-x86_64.zip \ - && unzip protoc-25.1-linux-x86_64.zip -d $HOME/.local \ - && rm protoc-25.1-linux-x86_64.zip - -ENV PATH="$PATH:$HOME/.local/bin" \ No newline at end of file +# https://datafusion.apache.org/contributor-guide/development_environment.html#protoc-installation +RUN apt-get update \ + && apt-get install -y --no-install-recommends protobuf-compiler libprotobuf-dev \ + && rm -rf /var/lib/apt/lists/* diff --git a/.github/actions/setup-macos-aarch64-builder/action.yaml b/.github/actions/setup-macos-aarch64-builder/action.yaml index 288799a284b01..06fddebe9507d 100644 --- a/.github/actions/setup-macos-aarch64-builder/action.yaml +++ b/.github/actions/setup-macos-aarch64-builder/action.yaml @@ -45,5 +45,7 @@ runs: rustup component add rustfmt - name: Setup rust cache uses: Swatinem/rust-cache@v2 + with: + save-if: ${{ github.ref_name == 'main' }} - name: Configure rust runtime env uses: ./.github/actions/setup-rust-runtime diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index ecb25483ce07e..eacec0abee55d 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -51,6 +51,11 @@ jobs: uses: ./.github/actions/setup-builder with: rust-version: stable + - name: Rust Dependency Cache + uses: Swatinem/rust-cache@v2 + with: + shared-key: "amd-ci-check" # this job uses it's own cache becase check has a separate cache and we need it to be fast as it blocks other jobs + save-if: ${{ github.ref_name == 'main' }} - name: Prepare cargo build run: | # Adding `--locked` here to assert that the `Cargo.lock` file is up to @@ -99,6 +104,11 @@ jobs: uses: ./.github/actions/setup-builder with: rust-version: stable + - name: Rust Dependency Cache + uses: Swatinem/rust-cache@v2 + with: + save-if: false # set in linux-test + shared-key: "amd-ci" - name: Check datafusion-substrait (default features) run: cargo check --profile ci --all-targets -p datafusion-substrait # @@ -162,6 +172,11 @@ jobs: uses: ./.github/actions/setup-builder with: rust-version: stable + - name: Rust Dependency Cache + uses: Swatinem/rust-cache@v2 + with: + save-if: false # set in linux-test + shared-key: "amd-ci" - name: Check datafusion (default features) run: cargo check --profile ci --all-targets -p datafusion # @@ -203,6 +218,8 @@ jobs: run: cargo check --profile ci --no-default-features -p datafusion --features=string_expressions - name: Check datafusion (unicode_expressions) run: cargo check --profile ci --no-default-features -p datafusion --features=unicode_expressions + - name: Check parquet encryption (parquet_encryption) + run: cargo check --profile ci --no-default-features -p datafusion --features=parquet_encryption # Check datafusion-functions crate features # @@ -247,15 +264,22 @@ jobs: name: cargo test (amd64) needs: linux-build-lib runs-on: ubuntu-latest + container: + image: amd64/rust steps: - uses: actions/checkout@v4 with: submodules: true fetch-depth: 1 - name: Setup Rust toolchain - run: rustup toolchain install stable - - name: Install Protobuf Compiler - run: sudo apt-get install -y protobuf-compiler + uses: ./.github/actions/setup-builder + with: + rust-version: stable + - name: Rust Dependency Cache + uses: Swatinem/rust-cache@v2 + with: + save-if: ${{ github.ref_name == 'main' }} + shared-key: "amd-ci" - name: Run tests (excluding doctests and datafusion-cli) env: RUST_BACKTRACE: 1 @@ -279,6 +303,10 @@ jobs: name: cargo test datafusion-cli (amd64) needs: linux-build-lib runs-on: ubuntu-latest + # should be uncommented once https://github.com/apache/datafusion/pull/16644 is merged + # and cache should be added + # container: + # image: amd64/rust steps: - uses: actions/checkout@v4 with: @@ -286,18 +314,6 @@ jobs: fetch-depth: 1 - name: Setup Rust toolchain run: rustup toolchain install stable - - name: Setup Minio - S3-compatible storage - run: | - docker run -d --name minio-container \ - -p 9000:9000 \ - -e MINIO_ROOT_USER=TEST-DataFusionLogin -e MINIO_ROOT_PASSWORD=TEST-DataFusionPassword \ - -v $(pwd)/datafusion/core/tests/data:/source quay.io/minio/minio \ - server /data - docker exec minio-container /bin/sh -c "\ - mc ready local - mc alias set localminio http://localhost:9000 TEST-DataFusionLogin TEST-DataFusionPassword && \ - mc mb localminio/data && \ - mc cp -r /source/* localminio/data" - name: Run tests (excluding doctests) env: RUST_BACKTRACE: 1 @@ -309,9 +325,6 @@ jobs: run: cargo test --profile ci -p datafusion-cli --lib --tests --bins - name: Verify Working Directory Clean run: git diff --exit-code - - name: Minio Output - if: ${{ !cancelled() }} - run: docker logs minio-container linux-test-example: @@ -329,6 +342,11 @@ jobs: uses: ./.github/actions/setup-builder with: rust-version: stable + - name: Rust Dependency Cache + uses: Swatinem/rust-cache@v2 + with: + save-if: ${{ github.ref_name == 'main' }} + shared-key: "amd-ci-linux-test-example" - name: Run examples run: | # test datafusion-sql examples @@ -655,6 +673,11 @@ jobs: rust-version: stable - name: Install Clippy run: rustup component add clippy + - name: Rust Dependency Cache + uses: Swatinem/rust-cache@v2 + with: + save-if: ${{ github.ref_name == 'main' }} + shared-key: "amd-ci-clippy" - name: Run clippy run: ci/scripts/rust_clippy.sh @@ -733,10 +756,15 @@ jobs: # `rust-version` key of `Cargo.toml`. # # To reproduce: - # 1. Install the version of Rust that is failing. Example: - # rustup install 1.80.1 - # 2. Run the command that failed with that version. Example: - # cargo +1.80.1 check -p datafusion + # 1. Install the version of Rust that is failing. + # 2. Run the command that failed with that version. + # + # Example: + # # MSRV looks like "1.80.0" and is specified in Cargo.toml. We can read the value with the following command: + # msrv="$(cargo metadata --format-version=1 | jq '.packages[] | select( .name == "datafusion" ) | .rust_version' -r)" + # echo "MSRV: ${msrv}" + # rustup install "${msrv}" + # cargo "+${msrv}" check # # To resolve, either: # 1. Change your code to use older Rust features, diff --git a/Cargo.lock b/Cargo.lock index bf0d19db84134..200a2bd5bfcf2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "abi_stable" @@ -265,7 +265,7 @@ dependencies = [ "arrow-select", "arrow-string", "half", - "rand 0.9.1", + "rand 0.9.2", ] [[package]] @@ -623,9 +623,9 @@ checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "aws-config" -version = "1.8.1" +version = "1.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c18d005c70d2b9c0c1ea8876c039db0ec7fb71164d25c73ccea21bf41fd02171" +checksum = "c0baa720ebadea158c5bda642ac444a2af0cdf7bb66b46d1e4533de5d1f449d0" dependencies = [ "aws-credential-types", "aws-runtime", @@ -653,9 +653,9 @@ dependencies = [ [[package]] name = "aws-credential-types" -version = "1.2.3" +version = "1.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "687bc16bc431a8533fe0097c7f0182874767f920989d7260950172ae8e3c4465" +checksum = "b68c2194a190e1efc999612792e25b1ab3abfefe4306494efaaabc25933c0cbe" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -688,9 +688,9 @@ dependencies = [ [[package]] name = "aws-runtime" -version = "1.5.8" +version = "1.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f6c68419d8ba16d9a7463671593c54f81ba58cab466e9b759418da606dcc2e2" +checksum = "b2090e664216c78e766b6bac10fe74d2f451c02441d43484cd76ac9a295075f7" dependencies = [ "aws-credential-types", "aws-sigv4", @@ -712,9 +712,9 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.74.0" +version = "1.77.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0a69de9c1b9272da2872af60c7402683e7f45c06267735b4332deacb203239b" +checksum = "18f2f37fea82468fe3f5a059542c05392ef680c4f7f00e0db02df8b6e5c7d0c6" dependencies = [ "aws-credential-types", "aws-runtime", @@ -734,9 +734,9 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.75.0" +version = "1.78.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0b161d836fac72bdd5ac1a4cd1cdc38ab888c7af26cfd95f661be4409505e63" +checksum = "ecb4f6eada20e0193450cd48b12ed05e1e66baac86f39160191651b932f2b7d9" dependencies = [ "aws-credential-types", "aws-runtime", @@ -756,9 +756,9 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.76.0" +version = "1.79.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb1cd79a3412751a341a28e2cd0d6fa4345241976da427b075a0c0cd5409f886" +checksum = "317377afba3498fca4948c5d32b399ef9a5ad35561a1e8a6f2ac7273dabf802d" dependencies = [ "aws-credential-types", "aws-runtime", @@ -812,9 +812,9 @@ dependencies = [ [[package]] name = "aws-smithy-http" -version = "0.62.1" +version = "0.62.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99335bec6cdc50a346fda1437f9fefe33abf8c99060739a546a16457f2862ca9" +checksum = "43c82ba4cab184ea61f6edaafc1072aad3c2a17dcf4c0fce19ac5694b90d8b5f" dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", @@ -832,9 +832,9 @@ dependencies = [ [[package]] name = "aws-smithy-http-client" -version = "1.0.2" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e44697a9bded898dcd0b1cb997430d949b87f4f8940d91023ae9062bf218250" +checksum = "f108f1ca850f3feef3009bdcc977be201bca9a91058864d9de0684e64514bee0" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -883,9 +883,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime" -version = "1.8.3" +version = "1.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14302f06d1d5b7d333fd819943075b13d27c7700b414f574c3c35859bfb55d5e" +checksum = "660f70d9d8af6876b4c9aa8dcb0dbaf0f89b04ee9a4455bea1b4ba03b15f26f6" dependencies = [ "aws-smithy-async", "aws-smithy-http", @@ -907,9 +907,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime-api" -version = "1.8.1" +version = "1.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd8531b6d8882fd8f48f82a9754e682e29dd44cff27154af51fa3eb730f59efb" +checksum = "937a49ecf061895fca4a6dd8e864208ed9be7546c0527d04bc07d502ec5fba1c" dependencies = [ "aws-smithy-async", "aws-smithy-types", @@ -956,9 +956,9 @@ dependencies = [ [[package]] name = "aws-types" -version = "1.3.7" +version = "1.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a322fec39e4df22777ed3ad8ea868ac2f94cd15e1a55f6ee8d8d6305057689a" +checksum = "b069d19bf01e46298eaedd7c6f283fe565a59263e53eebec945f3e6398f42390" dependencies = [ "aws-credential-types", "aws-smithy-async", @@ -1392,23 +1392,12 @@ dependencies = [ [[package]] name = "chrono-tz" -version = "0.10.3" +version = "0.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efdce149c370f133a071ca8ef6ea340b7b88748ab0810097a9e2976eaa34b4f3" +checksum = "a6139a8597ed92cf816dfb33f5dd6cf0bb93a6adc938f11039f371bc5bcd26c3" dependencies = [ "chrono", - "chrono-tz-build", - "phf", -] - -[[package]] -name = "chrono-tz-build" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f10f8c9340e31fc120ff885fcdb54a0b48e474bbd77cab557f0c30a3e569402" -dependencies = [ - "parse-zoneinfo", - "phf_codegen", + "phf 0.12.1", ] [[package]] @@ -1462,9 +1451,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.40" +version = "4.5.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40b6887a1d8685cebccf115538db5c0efe625ccac9696ad45c409d96566e910f" +checksum = "be92d32e80243a54711e5d7ce823c35c41c9d929dc4ab58e1276f625841aadf9" dependencies = [ "clap_builder", "clap_derive", @@ -1472,9 +1461,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.40" +version = "4.5.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0c66c08ce9f0c698cbce5c0279d0bb6ac936d8674174fe48f736533b964f59e" +checksum = "707eab41e9622f9139419d573eca0900137718000c517d47da73045f54331c3d" dependencies = [ "anstream", "anstyle", @@ -1484,9 +1473,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.40" +version = "4.5.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2c7947ae4cc3d851207c1adb5b5e260ff0cca11446b1d6d1423788e442257ce" +checksum = "ef4f52386a59ca4c860f7393bcf8abd8dfd91ecccc0f774635ff68e92eeef491" dependencies = [ "heck 0.5.0", "proc-macro2", @@ -1656,7 +1645,7 @@ dependencies = [ "anes", "cast", "ciborium", - "clap 4.5.40", + "clap 4.5.41", "criterion-plot", "futures", "is-terminal", @@ -1829,7 +1818,7 @@ dependencies = [ [[package]] name = "datafusion" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "arrow-ipc", @@ -1872,6 +1861,8 @@ dependencies = [ "env_logger", "flate2", "futures", + "glob", + "hex", "insta", "itertools 0.14.0", "log", @@ -1880,7 +1871,7 @@ dependencies = [ "parking_lot", "parquet", "paste", - "rand 0.9.1", + "rand 0.9.2", "rand_distr", "regex", "rstest", @@ -1899,7 +1890,7 @@ dependencies = [ [[package]] name = "datafusion-benchmarks" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "datafusion", @@ -1911,7 +1902,7 @@ dependencies = [ "mimalloc", "object_store", "parquet", - "rand 0.9.1", + "rand 0.9.2", "serde", "serde_json", "snmalloc-rs", @@ -1923,7 +1914,7 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "async-trait", @@ -1947,7 +1938,7 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "async-trait", @@ -1968,14 +1959,14 @@ dependencies = [ [[package]] name = "datafusion-cli" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "assert_cmd", "async-trait", "aws-config", "aws-credential-types", - "clap 4.5.40", + "clap 4.5.41", "ctor", "datafusion", "dirs", @@ -1992,13 +1983,15 @@ dependencies = [ "regex", "rstest", "rustyline", + "testcontainers", + "testcontainers-modules", "tokio", "url", ] [[package]] name = "datafusion-common" -version = "48.0.0" +version = "49.0.0" dependencies = [ "ahash 0.8.12", "apache-avro", @@ -2017,7 +2010,7 @@ dependencies = [ "parquet", "paste", "pyo3", - "rand 0.9.1", + "rand 0.9.2", "recursive", "sqlparser", "tokio", @@ -2026,7 +2019,7 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "48.0.0" +version = "49.0.0" dependencies = [ "futures", "log", @@ -2035,7 +2028,7 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "async-compression", @@ -2059,7 +2052,7 @@ dependencies = [ "log", "object_store", "parquet", - "rand 0.9.1", + "rand 0.9.2", "tempfile", "tokio", "tokio-util", @@ -2070,7 +2063,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-avro" -version = "48.0.0" +version = "49.0.0" dependencies = [ "apache-avro", "arrow", @@ -2095,7 +2088,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "async-trait", @@ -2118,7 +2111,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "async-trait", @@ -2141,7 +2134,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-parquet" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "async-trait", @@ -2161,22 +2154,23 @@ dependencies = [ "datafusion-pruning", "datafusion-session", "futures", + "hex", "itertools 0.14.0", "log", "object_store", "parking_lot", "parquet", - "rand 0.9.1", + "rand 0.9.2", "tokio", ] [[package]] name = "datafusion-doc" -version = "48.0.0" +version = "49.0.0" [[package]] name = "datafusion-examples" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "arrow-flight", @@ -2194,6 +2188,7 @@ dependencies = [ "nix", "object_store", "prost", + "serde_json", "tempfile", "test-utils", "tokio", @@ -2206,7 +2201,7 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "chrono", @@ -2218,14 +2213,14 @@ dependencies = [ "log", "object_store", "parking_lot", - "rand 0.9.1", + "rand 0.9.2", "tempfile", "url", ] [[package]] name = "datafusion-expr" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "async-trait", @@ -2248,7 +2243,7 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2259,7 +2254,7 @@ dependencies = [ [[package]] name = "datafusion-ffi" -version = "48.0.0" +version = "49.0.0" dependencies = [ "abi_stable", "arrow", @@ -2280,7 +2275,7 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "arrow-buffer", @@ -2299,7 +2294,7 @@ dependencies = [ "itertools 0.14.0", "log", "md-5", - "rand 0.9.1", + "rand 0.9.2", "regex", "sha2", "tokio", @@ -2309,7 +2304,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "48.0.0" +version = "49.0.0" dependencies = [ "ahash 0.8.12", "arrow", @@ -2325,12 +2320,12 @@ dependencies = [ "half", "log", "paste", - "rand 0.9.1", + "rand 0.9.2", ] [[package]] name = "datafusion-functions-aggregate-common" -version = "48.0.0" +version = "49.0.0" dependencies = [ "ahash 0.8.12", "arrow", @@ -2338,12 +2333,12 @@ dependencies = [ "datafusion-common", "datafusion-expr-common", "datafusion-physical-expr-common", - "rand 0.9.1", + "rand 0.9.2", ] [[package]] name = "datafusion-functions-nested" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "arrow-ord", @@ -2360,12 +2355,12 @@ dependencies = [ "itertools 0.14.0", "log", "paste", - "rand 0.9.1", + "rand 0.9.2", ] [[package]] name = "datafusion-functions-table" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "async-trait", @@ -2379,7 +2374,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2395,7 +2390,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "48.0.0" +version = "49.0.0" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2403,7 +2398,7 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "48.0.0" +version = "49.0.0" dependencies = [ "datafusion-expr", "quote", @@ -2412,7 +2407,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "async-trait", @@ -2439,7 +2434,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "48.0.0" +version = "49.0.0" dependencies = [ "ahash 0.8.12", "arrow", @@ -2458,13 +2453,13 @@ dependencies = [ "log", "paste", "petgraph 0.8.2", - "rand 0.9.1", + "rand 0.9.2", "rstest", ] [[package]] name = "datafusion-physical-expr-common" -version = "48.0.0" +version = "49.0.0" dependencies = [ "ahash 0.8.12", "arrow", @@ -2476,7 +2471,7 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2497,7 +2492,7 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "48.0.0" +version = "49.0.0" dependencies = [ "ahash 0.8.12", "arrow", @@ -2524,7 +2519,7 @@ dependencies = [ "log", "parking_lot", "pin-project-lite", - "rand 0.9.1", + "rand 0.9.2", "rstest", "rstest_reuse", "tempfile", @@ -2533,7 +2528,7 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "chrono", @@ -2547,6 +2542,7 @@ dependencies = [ "doc-comment", "object_store", "pbjson", + "pretty_assertions", "prost", "serde", "serde_json", @@ -2555,7 +2551,7 @@ dependencies = [ [[package]] name = "datafusion-proto-common" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2568,7 +2564,7 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "arrow-schema", @@ -2587,7 +2583,7 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "async-trait", @@ -2609,9 +2605,11 @@ dependencies = [ [[package]] name = "datafusion-spark" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", + "chrono", + "criterion", "datafusion-catalog", "datafusion-common", "datafusion-execution", @@ -2619,11 +2617,12 @@ dependencies = [ "datafusion-functions", "datafusion-macros", "log", + "rand 0.9.2", ] [[package]] name = "datafusion-sql" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "bigdecimal", @@ -2647,14 +2646,14 @@ dependencies = [ [[package]] name = "datafusion-sqllogictest" -version = "48.0.0" +version = "49.0.0" dependencies = [ "arrow", "async-trait", "bigdecimal", "bytes", "chrono", - "clap 4.5.40", + "clap 4.5.41", "datafusion", "datafusion-spark", "datafusion-substrait", @@ -2680,7 +2679,7 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "48.0.0" +version = "49.0.0" dependencies = [ "async-recursion", "async-trait", @@ -2700,7 +2699,7 @@ dependencies = [ [[package]] name = "datafusion-wasmtest" -version = "48.0.0" +version = "49.0.0" dependencies = [ "chrono", "console_error_panic_hook", @@ -2730,6 +2729,12 @@ dependencies = [ "serde", ] +[[package]] +name = "diff" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" + [[package]] name = "difflib" version = "0.4.0" @@ -3751,6 +3756,17 @@ version = "3.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" +[[package]] +name = "io-uring" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b86e202f00093dcba4275d4636b93ef9dd75d025ae560d2521b45ea28ab49013" +dependencies = [ + "bitflags 2.9.1", + "cfg-if", + "libc", +] + [[package]] name = "ipnet" version = "2.11.0" @@ -4018,7 +4034,7 @@ checksum = "5297962ef19edda4ce33aaa484386e0a5b3d7f2f4e037cbeee00503ef6b29d33" dependencies = [ "anstream", "anstyle", - "clap 4.5.40", + "clap 4.5.41", "escape8259", ] @@ -4352,9 +4368,9 @@ dependencies = [ [[package]] name = "object_store" -version = "0.12.2" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7781f96d79ed0f961a7021424ab01840efbda64ae7a505aaea195efc91eaaec4" +checksum = "efc4f07659e11cd45a341cd24d71e683e3be65d9ff1f8150061678fe60437496" dependencies = [ "async-trait", "base64 0.22.1", @@ -4371,7 +4387,7 @@ dependencies = [ "parking_lot", "percent-encoding", "quick-xml", - "rand 0.9.1", + "rand 0.9.2", "reqwest", "ring", "rustls-pemfile", @@ -4523,15 +4539,6 @@ dependencies = [ "syn 2.0.104", ] -[[package]] -name = "parse-zoneinfo" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f2a05b18d44e2957b88f96ba460715e295bc1d7510468a2f3d3b44535d26c24" -dependencies = [ - "regex", -] - [[package]] name = "paste" version = "1.0.15" @@ -4609,34 +4616,32 @@ version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" dependencies = [ - "phf_shared", + "phf_shared 0.11.3", ] [[package]] -name = "phf_codegen" -version = "0.11.3" +name = "phf" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" +checksum = "913273894cec178f401a31ec4b656318d95473527be05c0752cc41cdc32be8b7" dependencies = [ - "phf_generator", - "phf_shared", + "phf_shared 0.12.1", ] [[package]] -name = "phf_generator" +name = "phf_shared" version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" +checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" dependencies = [ - "phf_shared", - "rand 0.8.5", + "siphasher", ] [[package]] name = "phf_shared" -version = "0.11.3" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" +checksum = "06005508882fb681fd97892ecff4b7fd0fee13ef1aa569f8695dae7ab9099981" dependencies = [ "siphasher", ] @@ -4747,7 +4752,7 @@ dependencies = [ "hmac", "md-5", "memchr", - "rand 0.9.1", + "rand 0.9.2", "sha2", "stringprep", ] @@ -4819,6 +4824,16 @@ dependencies = [ "termtree", ] +[[package]] +name = "pretty_assertions" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ae130e2f271fbc2ac3a40fb1d07180839cdbbe443c7a27e1e3c13c5cac0116d" +dependencies = [ + "diff", + "yansi", +] + [[package]] name = "prettyplease" version = "0.2.32" @@ -5032,9 +5047,9 @@ checksum = "5a651516ddc9168ebd67b24afd085a718be02f8858fe406591b013d101ce2f40" [[package]] name = "quick-xml" -version = "0.37.5" +version = "0.38.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb" +checksum = "8927b0664f5c5a98265138b7e3f90aa19a6b21353182469ace36d4ac527b7b1b" dependencies = [ "memchr", "serde", @@ -5069,7 +5084,7 @@ dependencies = [ "bytes", "getrandom 0.3.3", "lru-slab", - "rand 0.9.1", + "rand 0.9.2", "ring", "rustc-hash 2.1.1", "rustls", @@ -5139,9 +5154,9 @@ dependencies = [ [[package]] name = "rand" -version = "0.9.1" +version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ "rand_chacha 0.9.0", "rand_core 0.9.3", @@ -5192,7 +5207,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463" dependencies = [ "num-traits", - "rand 0.9.1", + "rand 0.9.2", ] [[package]] @@ -5765,9 +5780,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.140" +version = "1.0.141" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" +checksum = "30b9eff21ebe718216c6ec64e1d9ac57087aad11efc64e32002bce4a0d4c03d3" dependencies = [ "itoa", "memchr", @@ -6196,9 +6211,9 @@ dependencies = [ [[package]] name = "sysinfo" -version = "0.35.2" +version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c3ffa3e4ff2b324a57f7aeb3c349656c7b127c3c189520251a648102a92496e" +checksum = "252800745060e7b9ffb7b2badbd8b31cfa4aa2e61af879d0a3bf2a317c20217d" dependencies = [ "libc", "memchr", @@ -6247,7 +6262,7 @@ dependencies = [ "chrono-tz", "datafusion-common", "env_logger", - "rand 0.9.1", + "rand 0.9.2", ] [[package]] @@ -6435,17 +6450,19 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.45.1" +version = "1.46.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75ef51a33ef1da925cea3e4eb122833cb377c61439ca401b770f54902b806779" +checksum = "0cc3a2344dafbe23a245241fe8b09735b521110d30fcefbbd5feb1797ca35d17" dependencies = [ "backtrace", "bytes", + "io-uring", "libc", "mio", "parking_lot", "pin-project-lite", "signal-hook-registry", + "slab", "socket2", "tokio-macros", "windows-sys 0.52.0", @@ -6477,11 +6494,11 @@ dependencies = [ "log", "parking_lot", "percent-encoding", - "phf", + "phf 0.11.3", "pin-project-lite", "postgres-protocol", "postgres-types", - "rand 0.9.1", + "rand 0.9.2", "socket2", "tokio", "tokio-util", @@ -7493,6 +7510,12 @@ dependencies = [ "lzma-sys", ] +[[package]] +name = "yansi" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" + [[package]] name = "yoke" version = "0.8.0" diff --git a/Cargo.toml b/Cargo.toml index 8124abd013f4b..3fd88ef6cf233 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -75,9 +75,9 @@ license = "Apache-2.0" readme = "README.md" repository = "https://github.com/apache/datafusion" # Define Minimum Supported Rust Version (MSRV) -rust-version = "1.82.0" +rust-version = "1.85.1" # Define DataFusion version -version = "48.0.0" +version = "49.0.0" [workspace.dependencies] # We turn off default-features for some dependencies here so the workspaces which inherit them can @@ -99,7 +99,6 @@ arrow-flight = { version = "55.2.0", features = [ ] } arrow-ipc = { version = "55.2.0", default-features = false, features = [ "lz4", - "zstd", ] } arrow-ord = { version = "55.2.0", default-features = false } arrow-schema = { version = "55.2.0", default-features = false } @@ -110,50 +109,51 @@ chrono = { version = "0.4.41", default-features = false } criterion = "0.5.1" ctor = "0.4.0" dashmap = "6.0.1" -datafusion = { path = "datafusion/core", version = "48.0.0", default-features = false } -datafusion-catalog = { path = "datafusion/catalog", version = "48.0.0" } -datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "48.0.0" } -datafusion-common = { path = "datafusion/common", version = "48.0.0", default-features = false } -datafusion-common-runtime = { path = "datafusion/common-runtime", version = "48.0.0" } -datafusion-datasource = { path = "datafusion/datasource", version = "48.0.0", default-features = false } -datafusion-datasource-avro = { path = "datafusion/datasource-avro", version = "48.0.0", default-features = false } -datafusion-datasource-csv = { path = "datafusion/datasource-csv", version = "48.0.0", default-features = false } -datafusion-datasource-json = { path = "datafusion/datasource-json", version = "48.0.0", default-features = false } -datafusion-datasource-parquet = { path = "datafusion/datasource-parquet", version = "48.0.0", default-features = false } -datafusion-doc = { path = "datafusion/doc", version = "48.0.0" } -datafusion-execution = { path = "datafusion/execution", version = "48.0.0" } -datafusion-expr = { path = "datafusion/expr", version = "48.0.0" } -datafusion-expr-common = { path = "datafusion/expr-common", version = "48.0.0" } -datafusion-ffi = { path = "datafusion/ffi", version = "48.0.0" } -datafusion-functions = { path = "datafusion/functions", version = "48.0.0" } -datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "48.0.0" } -datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "48.0.0" } -datafusion-functions-nested = { path = "datafusion/functions-nested", version = "48.0.0" } -datafusion-functions-table = { path = "datafusion/functions-table", version = "48.0.0" } -datafusion-functions-window = { path = "datafusion/functions-window", version = "48.0.0" } -datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "48.0.0" } -datafusion-macros = { path = "datafusion/macros", version = "48.0.0" } -datafusion-optimizer = { path = "datafusion/optimizer", version = "48.0.0", default-features = false } -datafusion-physical-expr = { path = "datafusion/physical-expr", version = "48.0.0", default-features = false } -datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "48.0.0", default-features = false } -datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "48.0.0" } -datafusion-physical-plan = { path = "datafusion/physical-plan", version = "48.0.0" } -datafusion-proto = { path = "datafusion/proto", version = "48.0.0" } -datafusion-proto-common = { path = "datafusion/proto-common", version = "48.0.0" } -datafusion-pruning = { path = "datafusion/pruning", version = "48.0.0" } -datafusion-session = { path = "datafusion/session", version = "48.0.0" } -datafusion-spark = { path = "datafusion/spark", version = "48.0.0" } -datafusion-sql = { path = "datafusion/sql", version = "48.0.0" } -datafusion-substrait = { path = "datafusion/substrait", version = "48.0.0" } +datafusion = { path = "datafusion/core", version = "49.0.0", default-features = false } +datafusion-catalog = { path = "datafusion/catalog", version = "49.0.0" } +datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "49.0.0" } +datafusion-common = { path = "datafusion/common", version = "49.0.0", default-features = false } +datafusion-common-runtime = { path = "datafusion/common-runtime", version = "49.0.0" } +datafusion-datasource = { path = "datafusion/datasource", version = "49.0.0", default-features = false } +datafusion-datasource-avro = { path = "datafusion/datasource-avro", version = "49.0.0", default-features = false } +datafusion-datasource-csv = { path = "datafusion/datasource-csv", version = "49.0.0", default-features = false } +datafusion-datasource-json = { path = "datafusion/datasource-json", version = "49.0.0", default-features = false } +datafusion-datasource-parquet = { path = "datafusion/datasource-parquet", version = "49.0.0", default-features = false } +datafusion-doc = { path = "datafusion/doc", version = "49.0.0" } +datafusion-execution = { path = "datafusion/execution", version = "49.0.0" } +datafusion-expr = { path = "datafusion/expr", version = "49.0.0" } +datafusion-expr-common = { path = "datafusion/expr-common", version = "49.0.0" } +datafusion-ffi = { path = "datafusion/ffi", version = "49.0.0" } +datafusion-functions = { path = "datafusion/functions", version = "49.0.0" } +datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "49.0.0" } +datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "49.0.0" } +datafusion-functions-nested = { path = "datafusion/functions-nested", version = "49.0.0" } +datafusion-functions-table = { path = "datafusion/functions-table", version = "49.0.0" } +datafusion-functions-window = { path = "datafusion/functions-window", version = "49.0.0" } +datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "49.0.0" } +datafusion-macros = { path = "datafusion/macros", version = "49.0.0" } +datafusion-optimizer = { path = "datafusion/optimizer", version = "49.0.0", default-features = false } +datafusion-physical-expr = { path = "datafusion/physical-expr", version = "49.0.0", default-features = false } +datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "49.0.0", default-features = false } +datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "49.0.0" } +datafusion-physical-plan = { path = "datafusion/physical-plan", version = "49.0.0" } +datafusion-proto = { path = "datafusion/proto", version = "49.0.0" } +datafusion-proto-common = { path = "datafusion/proto-common", version = "49.0.0" } +datafusion-pruning = { path = "datafusion/pruning", version = "49.0.0" } +datafusion-session = { path = "datafusion/session", version = "49.0.0" } +datafusion-spark = { path = "datafusion/spark", version = "49.0.0" } +datafusion-sql = { path = "datafusion/sql", version = "49.0.0" } +datafusion-substrait = { path = "datafusion/substrait", version = "49.0.0" } doc-comment = "0.3" env_logger = "0.11" futures = "0.3" half = { version = "2.6.0", default-features = false } hashbrown = { version = "0.14.5", features = ["raw"] } +hex = { version = "0.4.3" } indexmap = "2.10.0" itertools = "0.14" log = "^0.4" -object_store = { version = "0.12.2", default-features = false } +object_store = { version = "0.12.3", default-features = false } parking_lot = "0.12" parquet = { version = "55.2.0", default-features = false, features = [ "arrow", @@ -173,7 +173,9 @@ rstest = "0.25.0" serde_json = "1" sqlparser = { version = "0.55.0", default-features = false, features = ["std", "visitor"] } tempfile = "3" -tokio = { version = "1.45", features = ["macros", "rt", "sync"] } +testcontainers = { version = "0.24", features = ["default"] } +testcontainers-modules = { version = "0.12" } +tokio = { version = "1.46", features = ["macros", "rt", "sync"] } url = "2.5.4" [profile.release] diff --git a/README.md b/README.md index c142d8f366b2e..fb7f838a572b6 100644 --- a/README.md +++ b/README.md @@ -120,6 +120,7 @@ Default features: - `datetime_expressions`: date and time functions such as `to_timestamp` - `encoding_expressions`: `encode` and `decode` functions - `parquet`: support for reading the [Apache Parquet] format +- `parquet_encryption`: support for using [Parquet Modular Encryption] - `regex_expressions`: regular expression functions, such as `regexp_match` - `unicode_expressions`: Include unicode aware functions such as `character_length` - `unparser`: enables support to reverse LogicalPlans back into SQL @@ -134,6 +135,7 @@ Optional features: [apache avro]: https://avro.apache.org/ [apache parquet]: https://parquet.apache.org/ +[parquet modular encryption]: https://parquet.apache.org/docs/file-format/data-pages/encryption/ ## DataFusion API Evolution and Deprecation Guidelines diff --git a/benchmarks/bench.sh b/benchmarks/bench.sh index effce26d1cd2e..8952e456398d0 100755 --- a/benchmarks/bench.sh +++ b/benchmarks/bench.sh @@ -89,32 +89,41 @@ tpch_mem10: TPCH inspired benchmark on Scale Factor (SF) 10 (~10GB), # Extended TPC-H Benchmarks sort_tpch: Benchmark of sorting speed for end-to-end sort queries on TPC-H dataset (SF=1) +sort_tpch10: Benchmark of sorting speed for end-to-end sort queries on TPC-H dataset (SF=10) topk_tpch: Benchmark of top-k (sorting with limit) queries on TPC-H dataset (SF=1) external_aggr: External aggregation benchmark on TPC-H dataset (SF=1) # ClickBench Benchmarks clickbench_1: ClickBench queries against a single parquet file -clickbench_partitioned: ClickBench queries against a partitioned (100 files) parquet +clickbench_partitioned: ClickBench queries against partitioned (100 files) parquet +clickbench_pushdown: ClickBench queries against partitioned (100 files) parquet w/ filter_pushdown enabled clickbench_extended: ClickBench \"inspired\" queries against a single parquet (DataFusion specific) # H2O.ai Benchmarks (Group By, Join, Window) -h2o_small: h2oai benchmark with small dataset (1e7 rows) for groupby, default file format is csv -h2o_medium: h2oai benchmark with medium dataset (1e8 rows) for groupby, default file format is csv -h2o_big: h2oai benchmark with large dataset (1e9 rows) for groupby, default file format is csv -h2o_small_join: h2oai benchmark with small dataset (1e7 rows) for join, default file format is csv -h2o_medium_join: h2oai benchmark with medium dataset (1e8 rows) for join, default file format is csv -h2o_big_join: h2oai benchmark with large dataset (1e9 rows) for join, default file format is csv -h2o_small_window: Extended h2oai benchmark with small dataset (1e7 rows) for window, default file format is csv -h2o_medium_window: Extended h2oai benchmark with medium dataset (1e8 rows) for window, default file format is csv -h2o_big_window: Extended h2oai benchmark with large dataset (1e9 rows) for window, default file format is csv +h2o_small: h2oai benchmark with small dataset (1e7 rows) for groupby, default file format is csv +h2o_medium: h2oai benchmark with medium dataset (1e8 rows) for groupby, default file format is csv +h2o_big: h2oai benchmark with large dataset (1e9 rows) for groupby, default file format is csv +h2o_small_join: h2oai benchmark with small dataset (1e7 rows) for join, default file format is csv +h2o_medium_join: h2oai benchmark with medium dataset (1e8 rows) for join, default file format is csv +h2o_big_join: h2oai benchmark with large dataset (1e9 rows) for join, default file format is csv +h2o_small_window: Extended h2oai benchmark with small dataset (1e7 rows) for window, default file format is csv +h2o_medium_window: Extended h2oai benchmark with medium dataset (1e8 rows) for window, default file format is csv +h2o_big_window: Extended h2oai benchmark with large dataset (1e9 rows) for window, default file format is csv +h2o_small_parquet: h2oai benchmark with small dataset (1e7 rows) for groupby, file format is parquet +h2o_medium_parquet: h2oai benchmark with medium dataset (1e8 rows) for groupby, file format is parquet +h2o_big_parquet: h2oai benchmark with large dataset (1e9 rows) for groupby, file format is parquet +h2o_small_join_parquet: h2oai benchmark with small dataset (1e7 rows) for join, file format is parquet +h2o_medium_join_parquet: h2oai benchmark with medium dataset (1e8 rows) for join, file format is parquet +h2o_big_join_parquet: h2oai benchmark with large dataset (1e9 rows) for join, file format is parquet +h2o_small_window_parquet: Extended h2oai benchmark with small dataset (1e7 rows) for window, file format is parquet +h2o_medium_window_parquet: Extended h2oai benchmark with medium dataset (1e8 rows) for window, file format is parquet +h2o_big_window_parquet: Extended h2oai benchmark with large dataset (1e9 rows) for window, file format is parquet # Join Order Benchmark (IMDB) imdb: Join Order Benchmark (JOB) using the IMDB dataset converted to parquet # Micro-Benchmarks (specific operators and features) cancellation: How long cancelling a query takes -parquet: Benchmark of parquet reader's filtering speed -sort: Benchmark of sorting speed ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Supported Configuration (Environment Variables) @@ -208,6 +217,9 @@ main() { clickbench_partitioned) data_clickbench_partitioned ;; + clickbench_pushdown) + data_clickbench_partitioned # same data as clickbench_partitioned + ;; clickbench_extended) data_clickbench_1 ;; @@ -242,6 +254,34 @@ main() { h2o_big_window) data_h2o_join "BIG" "CSV" ;; + h2o_small_parquet) + data_h2o "SMALL" "PARQUET" + ;; + h2o_medium_parquet) + data_h2o "MEDIUM" "PARQUET" + ;; + h2o_big_parquet) + data_h2o "BIG" "PARQUET" + ;; + h2o_small_join_parquet) + data_h2o_join "SMALL" "PARQUET" + ;; + h2o_medium_join_parquet) + data_h2o_join "MEDIUM" "PARQUET" + ;; + h2o_big_join_parquet) + data_h2o_join "BIG" "PARQUET" + ;; + # h2o window benchmark uses the same data as the h2o join + h2o_small_window_parquet) + data_h2o_join "SMALL" "PARQUET" + ;; + h2o_medium_window_parquet) + data_h2o_join "MEDIUM" "PARQUET" + ;; + h2o_big_window_parquet) + data_h2o_join "BIG" "PARQUET" + ;; external_aggr) # same data as for tpch data_tpch "1" @@ -250,6 +290,10 @@ main() { # same data as for tpch data_tpch "1" ;; + sort_tpch10) + # same data as for tpch10 + data_tpch "10" + ;; topk_tpch) # same data as for tpch data_tpch "1" @@ -298,10 +342,9 @@ main() { run_tpch "10" "csv" run_tpch_mem "10" run_cancellation - run_parquet - run_sort run_clickbench_1 run_clickbench_partitioned + run_clickbench_pushdown run_clickbench_extended run_h2o "SMALL" "PARQUET" "groupby" run_h2o "MEDIUM" "PARQUET" "groupby" @@ -333,18 +376,15 @@ main() { cancellation) run_cancellation ;; - parquet) - run_parquet - ;; - sort) - run_sort - ;; clickbench_1) run_clickbench_1 ;; clickbench_partitioned) run_clickbench_partitioned ;; + clickbench_pushdown) + run_clickbench_pushdown + ;; clickbench_extended) run_clickbench_extended ;; @@ -378,11 +418,42 @@ main() { h2o_big_window) run_h2o_window "BIG" "CSV" "window" ;; + h2o_small_parquet) + run_h2o "SMALL" "PARQUET" + ;; + h2o_medium_parquet) + run_h2o "MEDIUM" "PARQUET" + ;; + h2o_big_parquet) + run_h2o "BIG" "PARQUET" + ;; + h2o_small_join_parquet) + run_h2o_join "SMALL" "PARQUET" + ;; + h2o_medium_join_parquet) + run_h2o_join "MEDIUM" "PARQUET" + ;; + h2o_big_join_parquet) + run_h2o_join "BIG" "PARQUET" + ;; + # h2o window benchmark uses the same data as the h2o join + h2o_small_window_parquet) + run_h2o_window "SMALL" "PARQUET" + ;; + h2o_medium_window_parquet) + run_h2o_window "MEDIUM" "PARQUET" + ;; + h2o_big_window_parquet) + run_h2o_window "BIG" "PARQUET" + ;; external_aggr) run_external_aggr ;; sort_tpch) - run_sort_tpch + run_sort_tpch "1" + ;; + sort_tpch10) + run_sort_tpch "10" ;; topk_tpch) run_topk_tpch @@ -520,22 +591,6 @@ run_cancellation() { debug_run $CARGO_COMMAND --bin dfbench -- cancellation --iterations 5 --path "${DATA_DIR}/cancellation" -o "${RESULTS_FILE}" } -# Runs the parquet filter benchmark -run_parquet() { - RESULTS_FILE="${RESULTS_DIR}/parquet.json" - echo "RESULTS_FILE: ${RESULTS_FILE}" - echo "Running parquet filter benchmark..." - debug_run $CARGO_COMMAND --bin parquet -- filter --path "${DATA_DIR}" --scale-factor 1.0 --iterations 5 -o "${RESULTS_FILE}" -} - -# Runs the sort benchmark -run_sort() { - RESULTS_FILE="${RESULTS_DIR}/sort.json" - echo "RESULTS_FILE: ${RESULTS_FILE}" - echo "Running sort benchmark..." - debug_run $CARGO_COMMAND --bin parquet -- sort --path "${DATA_DIR}" --scale-factor 1.0 --iterations 5 -o "${RESULTS_FILE}" -} - # Downloads the single file hits.parquet ClickBench datasets from # https://github.com/ClickHouse/ClickBench/tree/main#data-loading @@ -590,7 +645,7 @@ run_clickbench_1() { debug_run $CARGO_COMMAND --bin dfbench -- clickbench --iterations 5 --path "${DATA_DIR}/hits.parquet" --queries-path "${SCRIPT_DIR}/queries/clickbench/queries" -o "${RESULTS_FILE}" ${QUERY_ARG} } - # Runs the clickbench benchmark with the partitioned parquet files + # Runs the clickbench benchmark with the partitioned parquet dataset (100 files) run_clickbench_partitioned() { RESULTS_FILE="${RESULTS_DIR}/clickbench_partitioned.json" echo "RESULTS_FILE: ${RESULTS_FILE}" @@ -598,6 +653,16 @@ run_clickbench_partitioned() { debug_run $CARGO_COMMAND --bin dfbench -- clickbench --iterations 5 --path "${DATA_DIR}/hits_partitioned" --queries-path "${SCRIPT_DIR}/queries/clickbench/queries" -o "${RESULTS_FILE}" ${QUERY_ARG} } + + # Runs the clickbench benchmark with the partitioned parquet files and filter_pushdown enabled +run_clickbench_pushdown() { + RESULTS_FILE="${RESULTS_DIR}/clickbench_pushdown.json" + echo "RESULTS_FILE: ${RESULTS_FILE}" + echo "Running clickbench (partitioned, 100 files) benchmark with pushdown_filters=true, reorder_filters=true..." + debug_run $CARGO_COMMAND --bin dfbench -- clickbench --pushdown --iterations 5 --path "${DATA_DIR}/hits_partitioned" --queries-path "${SCRIPT_DIR}/queries/clickbench/queries" -o "${RESULTS_FILE}" ${QUERY_ARG} +} + + # Runs the clickbench "extended" benchmark with a single large parquet file run_clickbench_extended() { RESULTS_FILE="${RESULTS_DIR}/clickbench_extended.json" @@ -997,8 +1062,13 @@ run_external_aggr() { # Runs the sort integration benchmark run_sort_tpch() { - TPCH_DIR="${DATA_DIR}/tpch_sf1" - RESULTS_FILE="${RESULTS_DIR}/sort_tpch.json" + SCALE_FACTOR=$1 + if [ -z "$SCALE_FACTOR" ] ; then + echo "Internal error: Scale factor not specified" + exit 1 + fi + TPCH_DIR="${DATA_DIR}/tpch_sf${SCALE_FACTOR}" + RESULTS_FILE="${RESULTS_DIR}/sort_tpch${SCALE_FACTOR}.json" echo "RESULTS_FILE: ${RESULTS_FILE}" echo "Running sort tpch benchmark..." diff --git a/benchmarks/queries/clickbench/extended/q0.sql b/benchmarks/queries/clickbench/extended/q0.sql index a1e55b5b25ac4..cb826e5f947e9 100644 --- a/benchmarks/queries/clickbench/extended/q0.sql +++ b/benchmarks/queries/clickbench/extended/q0.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT COUNT(DISTINCT "SearchPhrase"), COUNT(DISTINCT "MobilePhone"), COUNT(DISTINCT "MobilePhoneModel") FROM hits; diff --git a/benchmarks/queries/clickbench/extended/q1.sql b/benchmarks/queries/clickbench/extended/q1.sql index 84fac921c8cbb..7862423787d85 100644 --- a/benchmarks/queries/clickbench/extended/q1.sql +++ b/benchmarks/queries/clickbench/extended/q1.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT COUNT(DISTINCT "HitColor"), COUNT(DISTINCT "BrowserCountry"), COUNT(DISTINCT "BrowserLanguage") FROM hits; diff --git a/benchmarks/queries/clickbench/extended/q2.sql b/benchmarks/queries/clickbench/extended/q2.sql index 9832ce44d4cb6..de2be79885792 100644 --- a/benchmarks/queries/clickbench/extended/q2.sql +++ b/benchmarks/queries/clickbench/extended/q2.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT "BrowserCountry", COUNT(DISTINCT "SocialNetwork"), COUNT(DISTINCT "HitColor"), COUNT(DISTINCT "BrowserLanguage"), COUNT(DISTINCT "SocialAction") FROM hits GROUP BY 1 ORDER BY 2 DESC LIMIT 10; diff --git a/benchmarks/queries/clickbench/extended/q3.sql b/benchmarks/queries/clickbench/extended/q3.sql index d1661bc216e5c..f52990b9843a5 100644 --- a/benchmarks/queries/clickbench/extended/q3.sql +++ b/benchmarks/queries/clickbench/extended/q3.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT "SocialSourceNetworkID", "RegionID", COUNT(*), AVG("Age"), AVG("ParamPrice"), STDDEV("ParamPrice") as s, VAR("ParamPrice") FROM hits GROUP BY "SocialSourceNetworkID", "RegionID" HAVING s IS NOT NULL ORDER BY s DESC LIMIT 10; diff --git a/benchmarks/queries/clickbench/extended/q4.sql b/benchmarks/queries/clickbench/extended/q4.sql index bd54956a2bcde..5865129db6425 100644 --- a/benchmarks/queries/clickbench/extended/q4.sql +++ b/benchmarks/queries/clickbench/extended/q4.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT "ClientIP", "WatchID", COUNT(*) c, MIN("ResponseStartTiming") tmin, MEDIAN("ResponseStartTiming") tmed, MAX("ResponseStartTiming") tmax FROM hits WHERE "JavaEnable" = 0 GROUP BY "ClientIP", "WatchID" HAVING c > 1 ORDER BY tmed DESC LIMIT 10; diff --git a/benchmarks/queries/clickbench/extended/q5.sql b/benchmarks/queries/clickbench/extended/q5.sql index 9de2f517d09be..18d3e01c82c4b 100644 --- a/benchmarks/queries/clickbench/extended/q5.sql +++ b/benchmarks/queries/clickbench/extended/q5.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT "ClientIP", "WatchID", COUNT(*) c, MIN("ResponseStartTiming") tmin, APPROX_PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY "ResponseStartTiming") tp95, MAX("ResponseStartTiming") tmax FROM 'hits' WHERE "JavaEnable" = 0 GROUP BY "ClientIP", "WatchID" HAVING c > 1 ORDER BY tp95 DESC LIMIT 10; diff --git a/benchmarks/queries/clickbench/extended/q6.sql b/benchmarks/queries/clickbench/extended/q6.sql index 091e8867c7ef6..0a6467b8898aa 100644 --- a/benchmarks/queries/clickbench/extended/q6.sql +++ b/benchmarks/queries/clickbench/extended/q6.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT COUNT(*) AS ShareCount FROM hits WHERE "IsMobile" = 1 AND "MobilePhoneModel" LIKE 'iPhone%' AND "SocialAction" = 'share' AND "SocialSourceNetworkID" IN (5, 12) AND "ClientTimeZone" BETWEEN -5 AND 5 AND regexp_match("Referer", '\/campaign\/(spring|summer)_promo') IS NOT NULL AND CASE WHEN split_part(split_part("URL", 'resolution=', 2), '&', 1) ~ '^\d+$' THEN split_part(split_part("URL", 'resolution=', 2), '&', 1)::INT ELSE 0 END > 1920 AND levenshtein(CAST("UTMSource" AS STRING), CAST("UTMCampaign" AS STRING)) < 3; diff --git a/benchmarks/queries/clickbench/queries/q0.sql b/benchmarks/queries/clickbench/queries/q0.sql index c70aa7a844d75..35f2b32ed4863 100644 --- a/benchmarks/queries/clickbench/queries/q0.sql +++ b/benchmarks/queries/clickbench/queries/q0.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 + +-- set datafusion.execution.parquet.binary_as_string = true SELECT COUNT(*) FROM hits; diff --git a/benchmarks/queries/clickbench/queries/q1.sql b/benchmarks/queries/clickbench/queries/q1.sql index 283a5c3cc82b6..0bee959ec3c7d 100644 --- a/benchmarks/queries/clickbench/queries/q1.sql +++ b/benchmarks/queries/clickbench/queries/q1.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT COUNT(*) FROM hits WHERE "AdvEngineID" <> 0; diff --git a/benchmarks/queries/clickbench/queries/q10.sql b/benchmarks/queries/clickbench/queries/q10.sql index dd44e5c493681..0f9114803fecf 100644 --- a/benchmarks/queries/clickbench/queries/q10.sql +++ b/benchmarks/queries/clickbench/queries/q10.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhoneModel" ORDER BY u DESC LIMIT 10; diff --git a/benchmarks/queries/clickbench/queries/q11.sql b/benchmarks/queries/clickbench/queries/q11.sql index 9349d450699c4..bed8bb210e130 100644 --- a/benchmarks/queries/clickbench/queries/q11.sql +++ b/benchmarks/queries/clickbench/queries/q11.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT "MobilePhone", "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhone", "MobilePhoneModel" ORDER BY u DESC LIMIT 10; diff --git a/benchmarks/queries/clickbench/queries/q12.sql b/benchmarks/queries/clickbench/queries/q12.sql index 908af63149889..8cf09c0049f3d 100644 --- a/benchmarks/queries/clickbench/queries/q12.sql +++ b/benchmarks/queries/clickbench/queries/q12.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT "SearchPhrase", COUNT(*) AS c FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10; diff --git a/benchmarks/queries/clickbench/queries/q13.sql b/benchmarks/queries/clickbench/queries/q13.sql index 46e1e6b4a74dc..ef6583c8d1886 100644 --- a/benchmarks/queries/clickbench/queries/q13.sql +++ b/benchmarks/queries/clickbench/queries/q13.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT "SearchPhrase", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY u DESC LIMIT 10; diff --git a/benchmarks/queries/clickbench/queries/q14.sql b/benchmarks/queries/clickbench/queries/q14.sql index d6c5118168f01..dd267146edec5 100644 --- a/benchmarks/queries/clickbench/queries/q14.sql +++ b/benchmarks/queries/clickbench/queries/q14.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT "SearchEngineID", "SearchPhrase", COUNT(*) AS c FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchEngineID", "SearchPhrase" ORDER BY c DESC LIMIT 10; diff --git a/benchmarks/queries/clickbench/queries/q15.sql b/benchmarks/queries/clickbench/queries/q15.sql index f5b4e511a8865..721d924cb9b95 100644 --- a/benchmarks/queries/clickbench/queries/q15.sql +++ b/benchmarks/queries/clickbench/queries/q15.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT "UserID", COUNT(*) FROM hits GROUP BY "UserID" ORDER BY COUNT(*) DESC LIMIT 10; diff --git a/benchmarks/queries/clickbench/queries/q16.sql b/benchmarks/queries/clickbench/queries/q16.sql index 38e44b6849418..389725d58d7a3 100644 --- a/benchmarks/queries/clickbench/queries/q16.sql +++ b/benchmarks/queries/clickbench/queries/q16.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", "SearchPhrase" ORDER BY COUNT(*) DESC LIMIT 10; diff --git a/benchmarks/queries/clickbench/queries/q17.sql b/benchmarks/queries/clickbench/queries/q17.sql index 1a97cdd36a245..be9976a01d7a4 100644 --- a/benchmarks/queries/clickbench/queries/q17.sql +++ b/benchmarks/queries/clickbench/queries/q17.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", "SearchPhrase" LIMIT 10; diff --git a/benchmarks/queries/clickbench/queries/q18.sql b/benchmarks/queries/clickbench/queries/q18.sql index 5aeeedf78ee0d..d649f1edfe2a4 100644 --- a/benchmarks/queries/clickbench/queries/q18.sql +++ b/benchmarks/queries/clickbench/queries/q18.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT "UserID", extract(minute FROM to_timestamp_seconds("EventTime")) AS m, "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", m, "SearchPhrase" ORDER BY COUNT(*) DESC LIMIT 10; diff --git a/benchmarks/queries/clickbench/queries/q19.sql b/benchmarks/queries/clickbench/queries/q19.sql index e388497dd1ec3..8212a765730a3 100644 --- a/benchmarks/queries/clickbench/queries/q19.sql +++ b/benchmarks/queries/clickbench/queries/q19.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT "UserID" FROM hits WHERE "UserID" = 435090932899640449; diff --git a/benchmarks/queries/clickbench/queries/q2.sql b/benchmarks/queries/clickbench/queries/q2.sql index 9938e3081dd2f..bcdfad84ec10f 100644 --- a/benchmarks/queries/clickbench/queries/q2.sql +++ b/benchmarks/queries/clickbench/queries/q2.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT SUM("AdvEngineID"), COUNT(*), AVG("ResolutionWidth") FROM hits; diff --git a/benchmarks/queries/clickbench/queries/q20.sql b/benchmarks/queries/clickbench/queries/q20.sql index a7e6995c1f1bb..a7e488c2abcd8 100644 --- a/benchmarks/queries/clickbench/queries/q20.sql +++ b/benchmarks/queries/clickbench/queries/q20.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT COUNT(*) FROM hits WHERE "URL" LIKE '%google%'; diff --git a/benchmarks/queries/clickbench/queries/q21.sql b/benchmarks/queries/clickbench/queries/q21.sql index d857899d136c3..3551689728ede 100644 --- a/benchmarks/queries/clickbench/queries/q21.sql +++ b/benchmarks/queries/clickbench/queries/q21.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT "SearchPhrase", MIN("URL"), COUNT(*) AS c FROM hits WHERE "URL" LIKE '%google%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10; diff --git a/benchmarks/queries/clickbench/queries/q22.sql b/benchmarks/queries/clickbench/queries/q22.sql index 8ac4f099c4848..d5f696e75a8c8 100644 --- a/benchmarks/queries/clickbench/queries/q22.sql +++ b/benchmarks/queries/clickbench/queries/q22.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT "SearchPhrase", MIN("URL"), MIN("Title"), COUNT(*) AS c, COUNT(DISTINCT "UserID") FROM hits WHERE "Title" LIKE '%Google%' AND "URL" NOT LIKE '%.google.%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10; diff --git a/benchmarks/queries/clickbench/queries/q23.sql b/benchmarks/queries/clickbench/queries/q23.sql index 3623b0fed8062..ff399ded6ed8c 100644 --- a/benchmarks/queries/clickbench/queries/q23.sql +++ b/benchmarks/queries/clickbench/queries/q23.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT * FROM hits WHERE "URL" LIKE '%google%' ORDER BY "EventTime" LIMIT 10; diff --git a/benchmarks/queries/clickbench/queries/q24.sql b/benchmarks/queries/clickbench/queries/q24.sql index cee774aafe537..bc7a364151e23 100644 --- a/benchmarks/queries/clickbench/queries/q24.sql +++ b/benchmarks/queries/clickbench/queries/q24.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "EventTime" LIMIT 10; diff --git a/benchmarks/queries/clickbench/queries/q25.sql b/benchmarks/queries/clickbench/queries/q25.sql index 048b4cd9d3e27..5332e3451aeaf 100644 --- a/benchmarks/queries/clickbench/queries/q25.sql +++ b/benchmarks/queries/clickbench/queries/q25.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "SearchPhrase" LIMIT 10; diff --git a/benchmarks/queries/clickbench/queries/q26.sql b/benchmarks/queries/clickbench/queries/q26.sql index 104e8d50ecb0b..bc1108aea1255 100644 --- a/benchmarks/queries/clickbench/queries/q26.sql +++ b/benchmarks/queries/clickbench/queries/q26.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "EventTime", "SearchPhrase" LIMIT 10; diff --git a/benchmarks/queries/clickbench/queries/q27.sql b/benchmarks/queries/clickbench/queries/q27.sql index c84cad9296e03..ba234d34f8877 100644 --- a/benchmarks/queries/clickbench/queries/q27.sql +++ b/benchmarks/queries/clickbench/queries/q27.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT "CounterID", AVG(length("URL")) AS l, COUNT(*) AS c FROM hits WHERE "URL" <> '' GROUP BY "CounterID" HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; diff --git a/benchmarks/queries/clickbench/queries/q28.sql b/benchmarks/queries/clickbench/queries/q28.sql index 8c5a51877f329..6a3bd037bece7 100644 --- a/benchmarks/queries/clickbench/queries/q28.sql +++ b/benchmarks/queries/clickbench/queries/q28.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT REGEXP_REPLACE("Referer", '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length("Referer")) AS l, COUNT(*) AS c, MIN("Referer") FROM hits WHERE "Referer" <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; diff --git a/benchmarks/queries/clickbench/queries/q29.sql b/benchmarks/queries/clickbench/queries/q29.sql index bfff2509062d0..bca1eb7bbe54b 100644 --- a/benchmarks/queries/clickbench/queries/q29.sql +++ b/benchmarks/queries/clickbench/queries/q29.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT SUM("ResolutionWidth"), SUM("ResolutionWidth" + 1), SUM("ResolutionWidth" + 2), SUM("ResolutionWidth" + 3), SUM("ResolutionWidth" + 4), SUM("ResolutionWidth" + 5), SUM("ResolutionWidth" + 6), SUM("ResolutionWidth" + 7), SUM("ResolutionWidth" + 8), SUM("ResolutionWidth" + 9), SUM("ResolutionWidth" + 10), SUM("ResolutionWidth" + 11), SUM("ResolutionWidth" + 12), SUM("ResolutionWidth" + 13), SUM("ResolutionWidth" + 14), SUM("ResolutionWidth" + 15), SUM("ResolutionWidth" + 16), SUM("ResolutionWidth" + 17), SUM("ResolutionWidth" + 18), SUM("ResolutionWidth" + 19), SUM("ResolutionWidth" + 20), SUM("ResolutionWidth" + 21), SUM("ResolutionWidth" + 22), SUM("ResolutionWidth" + 23), SUM("ResolutionWidth" + 24), SUM("ResolutionWidth" + 25), SUM("ResolutionWidth" + 26), SUM("ResolutionWidth" + 27), SUM("ResolutionWidth" + 28), SUM("ResolutionWidth" + 29), SUM("ResolutionWidth" + 30), SUM("ResolutionWidth" + 31), SUM("ResolutionWidth" + 32), SUM("ResolutionWidth" + 33), SUM("ResolutionWidth" + 34), SUM("ResolutionWidth" + 35), SUM("ResolutionWidth" + 36), SUM("ResolutionWidth" + 37), SUM("ResolutionWidth" + 38), SUM("ResolutionWidth" + 39), SUM("ResolutionWidth" + 40), SUM("ResolutionWidth" + 41), SUM("ResolutionWidth" + 42), SUM("ResolutionWidth" + 43), SUM("ResolutionWidth" + 44), SUM("ResolutionWidth" + 45), SUM("ResolutionWidth" + 46), SUM("ResolutionWidth" + 47), SUM("ResolutionWidth" + 48), SUM("ResolutionWidth" + 49), SUM("ResolutionWidth" + 50), SUM("ResolutionWidth" + 51), SUM("ResolutionWidth" + 52), SUM("ResolutionWidth" + 53), SUM("ResolutionWidth" + 54), SUM("ResolutionWidth" + 55), SUM("ResolutionWidth" + 56), SUM("ResolutionWidth" + 57), SUM("ResolutionWidth" + 58), SUM("ResolutionWidth" + 59), SUM("ResolutionWidth" + 60), SUM("ResolutionWidth" + 61), SUM("ResolutionWidth" + 62), SUM("ResolutionWidth" + 63), SUM("ResolutionWidth" + 64), SUM("ResolutionWidth" + 65), SUM("ResolutionWidth" + 66), SUM("ResolutionWidth" + 67), SUM("ResolutionWidth" + 68), SUM("ResolutionWidth" + 69), SUM("ResolutionWidth" + 70), SUM("ResolutionWidth" + 71), SUM("ResolutionWidth" + 72), SUM("ResolutionWidth" + 73), SUM("ResolutionWidth" + 74), SUM("ResolutionWidth" + 75), SUM("ResolutionWidth" + 76), SUM("ResolutionWidth" + 77), SUM("ResolutionWidth" + 78), SUM("ResolutionWidth" + 79), SUM("ResolutionWidth" + 80), SUM("ResolutionWidth" + 81), SUM("ResolutionWidth" + 82), SUM("ResolutionWidth" + 83), SUM("ResolutionWidth" + 84), SUM("ResolutionWidth" + 85), SUM("ResolutionWidth" + 86), SUM("ResolutionWidth" + 87), SUM("ResolutionWidth" + 88), SUM("ResolutionWidth" + 89) FROM hits; diff --git a/benchmarks/queries/clickbench/queries/q3.sql b/benchmarks/queries/clickbench/queries/q3.sql index db818fa013efe..09cdaca713047 100644 --- a/benchmarks/queries/clickbench/queries/q3.sql +++ b/benchmarks/queries/clickbench/queries/q3.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT AVG("UserID") FROM hits; diff --git a/benchmarks/queries/clickbench/queries/q30.sql b/benchmarks/queries/clickbench/queries/q30.sql index 8b4bf19b7f9c7..c0d657927478e 100644 --- a/benchmarks/queries/clickbench/queries/q30.sql +++ b/benchmarks/queries/clickbench/queries/q30.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT "SearchEngineID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchEngineID", "ClientIP" ORDER BY c DESC LIMIT 10; diff --git a/benchmarks/queries/clickbench/queries/q31.sql b/benchmarks/queries/clickbench/queries/q31.sql index 5ab49a38b8041..76ab3622ffb57 100644 --- a/benchmarks/queries/clickbench/queries/q31.sql +++ b/benchmarks/queries/clickbench/queries/q31.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10; diff --git a/benchmarks/queries/clickbench/queries/q32.sql b/benchmarks/queries/clickbench/queries/q32.sql index d00bc12405edb..88f1e4ce42d23 100644 --- a/benchmarks/queries/clickbench/queries/q32.sql +++ b/benchmarks/queries/clickbench/queries/q32.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10; diff --git a/benchmarks/queries/clickbench/queries/q33.sql b/benchmarks/queries/clickbench/queries/q33.sql index 45d491d1c30b8..3740503bbc0e9 100644 --- a/benchmarks/queries/clickbench/queries/q33.sql +++ b/benchmarks/queries/clickbench/queries/q33.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT "URL", COUNT(*) AS c FROM hits GROUP BY "URL" ORDER BY c DESC LIMIT 10; diff --git a/benchmarks/queries/clickbench/queries/q34.sql b/benchmarks/queries/clickbench/queries/q34.sql index 7e878804de065..fdb7edbb656ac 100644 --- a/benchmarks/queries/clickbench/queries/q34.sql +++ b/benchmarks/queries/clickbench/queries/q34.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT 1, "URL", COUNT(*) AS c FROM hits GROUP BY 1, "URL" ORDER BY c DESC LIMIT 10; diff --git a/benchmarks/queries/clickbench/queries/q35.sql b/benchmarks/queries/clickbench/queries/q35.sql index c03da84fb19e1..de7e2256eb551 100644 --- a/benchmarks/queries/clickbench/queries/q35.sql +++ b/benchmarks/queries/clickbench/queries/q35.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3, COUNT(*) AS c FROM hits GROUP BY "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3 ORDER BY c DESC LIMIT 10; diff --git a/benchmarks/queries/clickbench/queries/q36.sql b/benchmarks/queries/clickbench/queries/q36.sql index b76dce5cab9eb..81b1199b0381e 100644 --- a/benchmarks/queries/clickbench/queries/q36.sql +++ b/benchmarks/queries/clickbench/queries/q36.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND "URL" <> '' GROUP BY "URL" ORDER BY PageViews DESC LIMIT 10; diff --git a/benchmarks/queries/clickbench/queries/q37.sql b/benchmarks/queries/clickbench/queries/q37.sql index 49017e3a5f1d1..fa4b85ffbd9cb 100644 --- a/benchmarks/queries/clickbench/queries/q37.sql +++ b/benchmarks/queries/clickbench/queries/q37.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT "Title", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND "Title" <> '' GROUP BY "Title" ORDER BY PageViews DESC LIMIT 10; diff --git a/benchmarks/queries/clickbench/queries/q38.sql b/benchmarks/queries/clickbench/queries/q38.sql index b0cb6814bd855..18fafab6c888f 100644 --- a/benchmarks/queries/clickbench/queries/q38.sql +++ b/benchmarks/queries/clickbench/queries/q38.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 AND "IsLink" <> 0 AND "IsDownload" = 0 GROUP BY "URL" ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; diff --git a/benchmarks/queries/clickbench/queries/q39.sql b/benchmarks/queries/clickbench/queries/q39.sql index 8327eb9bd5724..306f0caacff64 100644 --- a/benchmarks/queries/clickbench/queries/q39.sql +++ b/benchmarks/queries/clickbench/queries/q39.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT "TraficSourceID", "SearchEngineID", "AdvEngineID", CASE WHEN ("SearchEngineID" = 0 AND "AdvEngineID" = 0) THEN "Referer" ELSE '' END AS Src, "URL" AS Dst, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 GROUP BY "TraficSourceID", "SearchEngineID", "AdvEngineID", Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; diff --git a/benchmarks/queries/clickbench/queries/q4.sql b/benchmarks/queries/clickbench/queries/q4.sql index 027310ad75266..d89ca78c2fb6f 100644 --- a/benchmarks/queries/clickbench/queries/q4.sql +++ b/benchmarks/queries/clickbench/queries/q4.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT COUNT(DISTINCT "UserID") FROM hits; diff --git a/benchmarks/queries/clickbench/queries/q40.sql b/benchmarks/queries/clickbench/queries/q40.sql index d30d7c4142713..e9d27f5985fa9 100644 --- a/benchmarks/queries/clickbench/queries/q40.sql +++ b/benchmarks/queries/clickbench/queries/q40.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT "URLHash", "EventDate", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 AND "TraficSourceID" IN (-1, 6) AND "RefererHash" = 3594120000172545465 GROUP BY "URLHash", "EventDate" ORDER BY PageViews DESC LIMIT 10 OFFSET 100; diff --git a/benchmarks/queries/clickbench/queries/q41.sql b/benchmarks/queries/clickbench/queries/q41.sql index 0e9a51a7f54c7..0e067e2dfc9da 100644 --- a/benchmarks/queries/clickbench/queries/q41.sql +++ b/benchmarks/queries/clickbench/queries/q41.sql @@ -1 +1,3 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true SELECT "WindowClientWidth", "WindowClientHeight", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 AND "DontCountHits" = 0 AND "URLHash" = 2868770270353813622 GROUP BY "WindowClientWidth", "WindowClientHeight" ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; diff --git a/benchmarks/queries/clickbench/queries/q42.sql b/benchmarks/queries/clickbench/queries/q42.sql index dcad5daa1b67f..111cc1d3c4a9d 100644 --- a/benchmarks/queries/clickbench/queries/q42.sql +++ b/benchmarks/queries/clickbench/queries/q42.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) AS M, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-14' AND "EventDate" <= '2013-07-15' AND "IsRefresh" = 0 AND "DontCountHits" = 0 GROUP BY DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) ORDER BY DATE_TRUNC('minute', M) LIMIT 10 OFFSET 1000; diff --git a/benchmarks/queries/clickbench/queries/q5.sql b/benchmarks/queries/clickbench/queries/q5.sql index 35b17097d87c8..d371cfb6b3557 100644 --- a/benchmarks/queries/clickbench/queries/q5.sql +++ b/benchmarks/queries/clickbench/queries/q5.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT COUNT(DISTINCT "SearchPhrase") FROM hits; diff --git a/benchmarks/queries/clickbench/queries/q6.sql b/benchmarks/queries/clickbench/queries/q6.sql index 684103643652f..5b4e896a1df26 100644 --- a/benchmarks/queries/clickbench/queries/q6.sql +++ b/benchmarks/queries/clickbench/queries/q6.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT MIN("EventDate"), MAX("EventDate") FROM hits; diff --git a/benchmarks/queries/clickbench/queries/q7.sql b/benchmarks/queries/clickbench/queries/q7.sql index ab8528c1b1412..afffcb1306d54 100644 --- a/benchmarks/queries/clickbench/queries/q7.sql +++ b/benchmarks/queries/clickbench/queries/q7.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT "AdvEngineID", COUNT(*) FROM hits WHERE "AdvEngineID" <> 0 GROUP BY "AdvEngineID" ORDER BY COUNT(*) DESC; diff --git a/benchmarks/queries/clickbench/queries/q8.sql b/benchmarks/queries/clickbench/queries/q8.sql index e5691bb66f81a..097880a9da5ed 100644 --- a/benchmarks/queries/clickbench/queries/q8.sql +++ b/benchmarks/queries/clickbench/queries/q8.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT "RegionID", COUNT(DISTINCT "UserID") AS u FROM hits GROUP BY "RegionID" ORDER BY u DESC LIMIT 10; diff --git a/benchmarks/queries/clickbench/queries/q9.sql b/benchmarks/queries/clickbench/queries/q9.sql index 42c22d96852da..cb1b79bf5bdc1 100644 --- a/benchmarks/queries/clickbench/queries/q9.sql +++ b/benchmarks/queries/clickbench/queries/q9.sql @@ -1 +1,4 @@ +-- Must set for ClickBench hits_partitioned dataset. See https://github.com/apache/datafusion/issues/16591 +-- set datafusion.execution.parquet.binary_as_string = true + SELECT "RegionID", SUM("AdvEngineID"), COUNT(*) AS c, AVG("ResolutionWidth"), COUNT(DISTINCT "UserID") FROM hits GROUP BY "RegionID" ORDER BY c DESC LIMIT 10; diff --git a/benchmarks/src/bin/dfbench.rs b/benchmarks/src/bin/dfbench.rs index 41b64063c099c..e92fd115c7d87 100644 --- a/benchmarks/src/bin/dfbench.rs +++ b/benchmarks/src/bin/dfbench.rs @@ -33,9 +33,7 @@ static ALLOC: snmalloc_rs::SnMalloc = snmalloc_rs::SnMalloc; #[global_allocator] static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc; -use datafusion_benchmarks::{ - cancellation, clickbench, h2o, imdb, parquet_filter, sort, sort_tpch, tpch, -}; +use datafusion_benchmarks::{cancellation, clickbench, h2o, imdb, sort_tpch, tpch}; #[derive(Debug, StructOpt)] #[structopt(about = "benchmark command")] @@ -44,8 +42,6 @@ enum Options { Clickbench(clickbench::RunOpt), H2o(h2o::RunOpt), Imdb(imdb::RunOpt), - ParquetFilter(parquet_filter::RunOpt), - Sort(sort::RunOpt), SortTpch(sort_tpch::RunOpt), Tpch(tpch::RunOpt), TpchConvert(tpch::ConvertOpt), @@ -61,8 +57,6 @@ pub async fn main() -> Result<()> { Options::Clickbench(opt) => opt.run().await, Options::H2o(opt) => opt.run().await, Options::Imdb(opt) => Box::pin(opt.run()).await, - Options::ParquetFilter(opt) => opt.run().await, - Options::Sort(opt) => opt.run().await, Options::SortTpch(opt) => opt.run().await, Options::Tpch(opt) => Box::pin(opt.run()).await, Options::TpchConvert(opt) => opt.run().await, diff --git a/benchmarks/src/bin/parquet.rs b/benchmarks/src/bin/parquet.rs deleted file mode 100644 index 6351a71a7bd3f..0000000000000 --- a/benchmarks/src/bin/parquet.rs +++ /dev/null @@ -1,49 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use datafusion::common::Result; - -use datafusion_benchmarks::{parquet_filter, sort}; -use structopt::StructOpt; - -#[cfg(feature = "snmalloc")] -#[global_allocator] -static ALLOC: snmalloc_rs::SnMalloc = snmalloc_rs::SnMalloc; - -#[derive(Debug, Clone, StructOpt)] -#[structopt(name = "Benchmarks", about = "Apache DataFusion Rust Benchmarks.")] -enum ParquetBenchCmd { - /// Benchmark sorting parquet files - Sort(sort::RunOpt), - /// Benchmark parquet filter pushdown - Filter(parquet_filter::RunOpt), -} - -#[tokio::main] -async fn main() -> Result<()> { - let cmd = ParquetBenchCmd::from_args(); - match cmd { - ParquetBenchCmd::Filter(opt) => { - println!("running filter benchmarks"); - opt.run().await - } - ParquetBenchCmd::Sort(opt) => { - println!("running sort benchmarks"); - opt.run().await - } - } -} diff --git a/benchmarks/src/clickbench.rs b/benchmarks/src/clickbench.rs index 1fa2304f2cefc..d5f6052d9e496 100644 --- a/benchmarks/src/clickbench.rs +++ b/benchmarks/src/clickbench.rs @@ -29,7 +29,7 @@ use datafusion_common::exec_datafusion_err; use datafusion_common::instant::Instant; use structopt::StructOpt; -/// Run the clickbench benchmark +/// Driver program to run the ClickBench benchmark /// /// The ClickBench[1] benchmarks are widely cited in the industry and /// focus on grouping / aggregation / filtering. This runner uses the @@ -44,6 +44,14 @@ pub struct RunOpt { #[structopt(short, long)] query: Option, + /// If specified, enables Parquet Filter Pushdown. + /// + /// Specifically, it enables: + /// * `pushdown_filters = true` + /// * `reorder_filters = true` + #[structopt(long = "pushdown")] + pushdown: bool, + /// Common options #[structopt(flatten)] common: CommonOpt, @@ -122,6 +130,12 @@ impl RunOpt { // The hits_partitioned dataset specifies string columns // as binary due to how it was written. Force it to strings parquet_options.binary_as_string = true; + + // Turn on Parquet filter pushdown if requested + if self.pushdown { + parquet_options.pushdown_filters = true; + parquet_options.reorder_filters = true; + } } let rt_builder = self.common.runtime_env_builder()?; diff --git a/benchmarks/src/h2o.rs b/benchmarks/src/h2o.rs index 009f1708ef983..9d4deaf387283 100644 --- a/benchmarks/src/h2o.rs +++ b/benchmarks/src/h2o.rs @@ -24,7 +24,7 @@ use crate::util::{BenchmarkRun, CommonOpt}; use datafusion::logical_expr::{ExplainFormat, ExplainOption}; use datafusion::{error::Result, prelude::SessionContext}; use datafusion_common::{ - exec_datafusion_err, instant::Instant, internal_err, DataFusionError, + exec_datafusion_err, instant::Instant, internal_err, DataFusionError, TableReference, }; use std::path::{Path, PathBuf}; use structopt::StructOpt; @@ -92,18 +92,18 @@ impl RunOpt { // Register tables depending on which h2o benchmark is being run // (groupby/join/window) if self.queries_path.to_str().unwrap().ends_with("groupby.sql") { - self.register_data(&ctx).await?; + self.register_data("x", self.path.as_os_str().to_str().unwrap(), &ctx) + .await?; } else if self.queries_path.to_str().unwrap().ends_with("join.sql") { let join_paths: Vec<&str> = self.join_paths.split(',').collect(); let table_name: Vec<&str> = vec!["x", "small", "medium", "large"]; for (i, path) in join_paths.iter().enumerate() { - ctx.register_csv(table_name[i], path, Default::default()) - .await?; + self.register_data(table_name[i], path, &ctx).await?; } } else if self.queries_path.to_str().unwrap().ends_with("window.sql") { // Only register the 'large' table in h2o-join dataset let h2o_join_large_path = self.join_paths.split(',').nth(3).unwrap(); - ctx.register_csv("large", h2o_join_large_path, Default::default()) + self.register_data("large", h2o_join_large_path, &ctx) .await?; } else { return internal_err!("Invalid query file path"); @@ -147,39 +147,52 @@ impl RunOpt { Ok(()) } - async fn register_data(&self, ctx: &SessionContext) -> Result<()> { + async fn register_data( + &self, + table_ref: impl Into, + table_path: impl AsRef, + ctx: &SessionContext, + ) -> Result<()> { let csv_options = Default::default(); let parquet_options = Default::default(); - let path = self.path.as_os_str().to_str().unwrap(); - - if self.path.extension().map(|s| s == "csv").unwrap_or(false) { - ctx.register_csv("x", path, csv_options) - .await - .map_err(|e| { - DataFusionError::Context( - format!("Registering 'table' as {path}"), - Box::new(e), - ) - }) - .expect("error registering csv"); - } - if self - .path + let table_path_str = table_path.as_ref(); + + let extension = Path::new(table_path_str) .extension() - .map(|s| s == "parquet") - .unwrap_or(false) - { - ctx.register_parquet("x", path, parquet_options) - .await - .map_err(|e| { - DataFusionError::Context( - format!("Registering 'table' as {path}"), - Box::new(e), - ) - }) - .expect("error registering parquet"); + .and_then(|s| s.to_str()) + .unwrap_or(""); + + match extension { + "csv" => { + ctx.register_csv(table_ref, table_path_str, csv_options) + .await + .map_err(|e| { + DataFusionError::Context( + format!("Registering 'table' as {table_path_str}"), + Box::new(e), + ) + }) + .expect("error registering csv"); + } + "parquet" => { + ctx.register_parquet(table_ref, table_path_str, parquet_options) + .await + .map_err(|e| { + DataFusionError::Context( + format!("Registering 'table' as {table_path_str}"), + Box::new(e), + ) + }) + .expect("error registering parquet"); + } + _ => { + return Err(DataFusionError::Plan(format!( + "Unsupported file extension: {extension}", + ))); + } } + Ok(()) } } diff --git a/benchmarks/src/imdb/run.rs b/benchmarks/src/imdb/run.rs index 0d9bdf536d10a..7c9d03a9d928d 100644 --- a/benchmarks/src/imdb/run.rs +++ b/benchmarks/src/imdb/run.rs @@ -51,7 +51,7 @@ type BoolDefaultTrue = bool; /// [2] and [3]. /// /// [1]: https://www.vldb.org/pvldb/vol9/p204-leis.pdf -/// [2]: http://homepages.cwi.nl/~boncz/job/imdb.tgz +/// [2]: https://event.cwi.nl/da/job/imdb.tgz /// [3]: https://db.in.tum.de/~leis/qo/job.tgz #[derive(Debug, StructOpt, Clone)] diff --git a/benchmarks/src/lib.rs b/benchmarks/src/lib.rs index a402fc1b8ce04..e7657c4078d12 100644 --- a/benchmarks/src/lib.rs +++ b/benchmarks/src/lib.rs @@ -20,8 +20,6 @@ pub mod cancellation; pub mod clickbench; pub mod h2o; pub mod imdb; -pub mod parquet_filter; -pub mod sort; pub mod sort_tpch; pub mod tpch; pub mod util; diff --git a/benchmarks/src/parquet_filter.rs b/benchmarks/src/parquet_filter.rs deleted file mode 100644 index 34103af0ffd21..0000000000000 --- a/benchmarks/src/parquet_filter.rs +++ /dev/null @@ -1,194 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::path::PathBuf; - -use crate::util::{AccessLogOpt, BenchmarkRun, CommonOpt}; - -use arrow::util::pretty; -use datafusion::common::Result; -use datafusion::logical_expr::utils::disjunction; -use datafusion::logical_expr::{lit, or, Expr}; -use datafusion::physical_plan::collect; -use datafusion::prelude::{col, SessionContext}; -use datafusion::test_util::parquet::{ParquetScanOptions, TestParquetFile}; -use datafusion_common::instant::Instant; - -use structopt::StructOpt; - -/// Test performance of parquet filter pushdown -/// -/// The queries are executed on a synthetic dataset generated during -/// the benchmark execution and designed to simulate web server access -/// logs. -/// -/// Example -/// -/// dfbench parquet-filter --path ./data --scale-factor 1.0 -/// -/// generates the synthetic dataset at `./data/logs.parquet`. The size -/// of the dataset can be controlled through the `size_factor` -/// (with the default value of `1.0` generating a ~1GB parquet file). -/// -/// For each filter we will run the query using different -/// `ParquetScanOption` settings. -/// -/// Example output: -/// -/// Running benchmarks with the following options: Opt { debug: false, iterations: 3, partitions: 2, path: "./data", batch_size: 8192, scale_factor: 1.0 } -/// Generated test dataset with 10699521 rows -/// Executing with filter 'request_method = Utf8("GET")' -/// Using scan options ParquetScanOptions { pushdown_filters: false, reorder_predicates: false, enable_page_index: false } -/// Iteration 0 returned 10699521 rows in 1303 ms -/// Iteration 1 returned 10699521 rows in 1288 ms -/// Iteration 2 returned 10699521 rows in 1266 ms -/// Using scan options ParquetScanOptions { pushdown_filters: true, reorder_predicates: true, enable_page_index: true } -/// Iteration 0 returned 1781686 rows in 1970 ms -/// Iteration 1 returned 1781686 rows in 2002 ms -/// Iteration 2 returned 1781686 rows in 1988 ms -/// Using scan options ParquetScanOptions { pushdown_filters: true, reorder_predicates: false, enable_page_index: true } -/// Iteration 0 returned 1781686 rows in 1940 ms -/// Iteration 1 returned 1781686 rows in 1986 ms -/// Iteration 2 returned 1781686 rows in 1947 ms -/// ... -#[derive(Debug, StructOpt, Clone)] -#[structopt(verbatim_doc_comment)] -pub struct RunOpt { - /// Common options - #[structopt(flatten)] - common: CommonOpt, - - /// Create data files - #[structopt(flatten)] - access_log: AccessLogOpt, - - /// Path to machine readable output file - #[structopt(parse(from_os_str), short = "o", long = "output")] - output_path: Option, -} - -impl RunOpt { - pub async fn run(self) -> Result<()> { - let test_file = self.access_log.build()?; - - let mut rundata = BenchmarkRun::new(); - let scan_options_matrix = vec![ - ParquetScanOptions { - pushdown_filters: false, - reorder_filters: false, - enable_page_index: false, - }, - ParquetScanOptions { - pushdown_filters: true, - reorder_filters: true, - enable_page_index: true, - }, - ParquetScanOptions { - pushdown_filters: true, - reorder_filters: true, - enable_page_index: false, - }, - ]; - - let filter_matrix = vec![ - ("Selective-ish filter", col("request_method").eq(lit("GET"))), - ( - "Non-selective filter", - col("request_method").not_eq(lit("GET")), - ), - ( - "Basic conjunction", - col("request_method") - .eq(lit("POST")) - .and(col("response_status").eq(lit(503_u16))), - ), - ( - "Nested filters", - col("request_method").eq(lit("POST")).and(or( - col("response_status").eq(lit(503_u16)), - col("response_status").eq(lit(403_u16)), - )), - ), - ( - "Many filters", - disjunction([ - col("request_method").not_eq(lit("GET")), - col("response_status").eq(lit(400_u16)), - col("service").eq(lit("backend")), - ]) - .unwrap(), - ), - ("Filter everything", col("response_status").eq(lit(429_u16))), - ("Filter nothing", col("response_status").gt(lit(0_u16))), - ]; - - for (name, filter_expr) in &filter_matrix { - println!("Executing '{name}' (filter: {filter_expr})"); - for scan_options in &scan_options_matrix { - println!("Using scan options {scan_options:?}"); - rundata.start_new_case(&format!( - "{name}: {}", - parquet_scan_disp(scan_options) - )); - for i in 0..self.common.iterations { - let config = self.common.update_config(scan_options.config()); - let ctx = SessionContext::new_with_config(config); - - let (rows, elapsed) = exec_scan( - &ctx, - &test_file, - filter_expr.clone(), - self.common.debug, - ) - .await?; - let ms = elapsed.as_secs_f64() * 1000.0; - println!("Iteration {i} returned {rows} rows in {ms} ms"); - rundata.write_iter(elapsed, rows); - } - } - println!("\n"); - } - rundata.maybe_write_json(self.output_path.as_ref())?; - Ok(()) - } -} - -fn parquet_scan_disp(opts: &ParquetScanOptions) -> String { - format!( - "pushdown_filters={}, reorder_filters={}, page_index={}", - opts.pushdown_filters, opts.reorder_filters, opts.enable_page_index - ) -} - -async fn exec_scan( - ctx: &SessionContext, - test_file: &TestParquetFile, - filter: Expr, - debug: bool, -) -> Result<(usize, std::time::Duration)> { - let start = Instant::now(); - let exec = test_file.create_scan(ctx, Some(filter)).await?; - - let task_ctx = ctx.task_ctx(); - let result = collect(exec, task_ctx).await?; - let elapsed = start.elapsed(); - if debug { - pretty::print_batches(&result)?; - } - let rows = result.iter().map(|b| b.num_rows()).sum(); - Ok((rows, elapsed)) -} diff --git a/benchmarks/src/sort.rs b/benchmarks/src/sort.rs deleted file mode 100644 index cbbd3b54ea9eb..0000000000000 --- a/benchmarks/src/sort.rs +++ /dev/null @@ -1,193 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::path::PathBuf; -use std::sync::Arc; - -use crate::util::{AccessLogOpt, BenchmarkRun, CommonOpt}; - -use arrow::util::pretty; -use datafusion::common::Result; -use datafusion::physical_expr::{LexOrdering, PhysicalSortExpr}; -use datafusion::physical_plan::collect; -use datafusion::physical_plan::sorts::sort::SortExec; -use datafusion::prelude::{SessionConfig, SessionContext}; -use datafusion::test_util::parquet::TestParquetFile; -use datafusion_common::instant::Instant; -use datafusion_common::utils::get_available_parallelism; -use structopt::StructOpt; - -/// Test performance of sorting large datasets -/// -/// This test sorts a a synthetic dataset generated during the -/// benchmark execution, designed to simulate sorting web server -/// access logs. Such sorting is often done during data transformation -/// steps. -/// -/// The tests sort the entire dataset using several different sort -/// orders. -/// -/// Example: -/// -/// dfbench sort --path ./data --scale-factor 1.0 -#[derive(Debug, StructOpt, Clone)] -#[structopt(verbatim_doc_comment)] -pub struct RunOpt { - /// Common options - #[structopt(flatten)] - common: CommonOpt, - - /// Create data files - #[structopt(flatten)] - access_log: AccessLogOpt, - - /// Path to machine readable output file - #[structopt(parse(from_os_str), short = "o", long = "output")] - output_path: Option, -} - -impl RunOpt { - pub async fn run(self) -> Result<()> { - let test_file = self.access_log.build()?; - - use datafusion::physical_expr::expressions::col; - let mut rundata = BenchmarkRun::new(); - let schema = test_file.schema(); - let sort_cases = vec![ - ( - "sort utf8", - [PhysicalSortExpr { - expr: col("request_method", &schema)?, - options: Default::default(), - }] - .into(), - ), - ( - "sort int", - [PhysicalSortExpr { - expr: col("response_bytes", &schema)?, - options: Default::default(), - }] - .into(), - ), - ( - "sort decimal", - [PhysicalSortExpr { - expr: col("decimal_price", &schema)?, - options: Default::default(), - }] - .into(), - ), - ( - "sort integer tuple", - [ - PhysicalSortExpr { - expr: col("request_bytes", &schema)?, - options: Default::default(), - }, - PhysicalSortExpr { - expr: col("response_bytes", &schema)?, - options: Default::default(), - }, - ] - .into(), - ), - ( - "sort utf8 tuple", - [ - // sort utf8 tuple - PhysicalSortExpr { - expr: col("service", &schema)?, - options: Default::default(), - }, - PhysicalSortExpr { - expr: col("host", &schema)?, - options: Default::default(), - }, - PhysicalSortExpr { - expr: col("pod", &schema)?, - options: Default::default(), - }, - PhysicalSortExpr { - expr: col("image", &schema)?, - options: Default::default(), - }, - ] - .into(), - ), - ( - "sort mixed tuple", - [ - PhysicalSortExpr { - expr: col("service", &schema)?, - options: Default::default(), - }, - PhysicalSortExpr { - expr: col("request_bytes", &schema)?, - options: Default::default(), - }, - PhysicalSortExpr { - expr: col("decimal_price", &schema)?, - options: Default::default(), - }, - ] - .into(), - ), - ]; - for (title, expr) in sort_cases { - println!("Executing '{title}' (sorting by: {expr:?})"); - rundata.start_new_case(title); - for i in 0..self.common.iterations { - let config = SessionConfig::new().with_target_partitions( - self.common - .partitions - .unwrap_or_else(get_available_parallelism), - ); - let ctx = SessionContext::new_with_config(config); - let (rows, elapsed) = - exec_sort(&ctx, &expr, &test_file, self.common.debug).await?; - let ms = elapsed.as_secs_f64() * 1000.0; - println!("Iteration {i} finished in {ms} ms"); - rundata.write_iter(elapsed, rows); - } - println!("\n"); - } - if let Some(path) = &self.output_path { - std::fs::write(path, rundata.to_json())?; - } - Ok(()) - } -} - -async fn exec_sort( - ctx: &SessionContext, - expr: &LexOrdering, - test_file: &TestParquetFile, - debug: bool, -) -> Result<(usize, std::time::Duration)> { - let start = Instant::now(); - let scan = test_file.create_scan(ctx, None).await?; - let exec = Arc::new(SortExec::new(expr.clone(), scan)); - let task_ctx = ctx.task_ctx(); - let result = collect(exec, task_ctx).await?; - let elapsed = start.elapsed(); - if debug { - pretty::print_batches(&result)?; - } - let rows = result.iter().map(|b| b.num_rows()).sum(); - Ok((rows, elapsed)) -} diff --git a/benchmarks/src/tpch/run.rs b/benchmarks/src/tpch/run.rs index 88960d7c7d16e..4f7e539a046f3 100644 --- a/benchmarks/src/tpch/run.rs +++ b/benchmarks/src/tpch/run.rs @@ -53,7 +53,7 @@ type BoolDefaultTrue = bool; /// [2]. /// /// [1]: http://www.tpc.org/tpch/ -/// [2]: https://github.com/databricks/tpch-dbgen.git, +/// [2]: https://github.com/databricks/tpch-dbgen.git /// [2.17.1]: https://www.tpc.org/tpc_documents_current_versions/pdf/tpc-h_v2.17.1.pdf #[derive(Debug, StructOpt, Clone)] #[structopt(verbatim_doc_comment)] diff --git a/benchmarks/src/util/access_log.rs b/benchmarks/src/util/access_log.rs deleted file mode 100644 index 2b29465ee20e3..0000000000000 --- a/benchmarks/src/util/access_log.rs +++ /dev/null @@ -1,74 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Benchmark data generation - -use datafusion::common::Result; -use datafusion::test_util::parquet::TestParquetFile; -use parquet::file::properties::WriterProperties; -use std::path::PathBuf; -use structopt::StructOpt; -use test_utils::AccessLogGenerator; - -// Options and builder for making an access log test file -// Note don't use docstring or else it ends up in help -#[derive(Debug, StructOpt, Clone)] -pub struct AccessLogOpt { - /// Path to folder where access log file will be generated - #[structopt(parse(from_os_str), required = true, short = "p", long = "path")] - path: PathBuf, - - /// Data page size of the generated parquet file - #[structopt(long = "page-size")] - page_size: Option, - - /// Data page size of the generated parquet file - #[structopt(long = "row-group-size")] - row_group_size: Option, - - /// Total size of generated dataset. The default scale factor of 1.0 will generate a roughly 1GB parquet file - #[structopt(long = "scale-factor", default_value = "1.0")] - scale_factor: f32, -} - -impl AccessLogOpt { - /// Create the access log and return the file. - /// - /// See [`TestParquetFile`] for more details - pub fn build(self) -> Result { - let path = self.path.join("logs.parquet"); - - let mut props_builder = WriterProperties::builder(); - - if let Some(s) = self.page_size { - props_builder = props_builder - .set_data_page_size_limit(s) - .set_write_batch_size(s); - } - - if let Some(s) = self.row_group_size { - props_builder = props_builder.set_max_row_group_size(s); - } - let props = props_builder.build(); - - let generator = AccessLogGenerator::new(); - - let num_batches = 100_f32 * self.scale_factor; - - TestParquetFile::try_new(path, props, generator.take(num_batches as usize)) - } -} diff --git a/benchmarks/src/util/mod.rs b/benchmarks/src/util/mod.rs index 420d52401c4e9..eb9a0b8bc9a21 100644 --- a/benchmarks/src/util/mod.rs +++ b/benchmarks/src/util/mod.rs @@ -16,10 +16,8 @@ // under the License. //! Shared benchmark utilities -mod access_log; mod options; mod run; -pub use access_log::AccessLogOpt; pub use options::CommonOpt; pub use run::{BenchQuery, BenchmarkRun, QueryResult}; diff --git a/datafusion-cli/CONTRIBUTING.md b/datafusion-cli/CONTRIBUTING.md index 4b464dffc57ce..3e72214f6c226 100644 --- a/datafusion-cli/CONTRIBUTING.md +++ b/datafusion-cli/CONTRIBUTING.md @@ -29,47 +29,26 @@ cargo test ## Running Storage Integration Tests -By default, storage integration tests are not run. To run them you will need to set `TEST_STORAGE_INTEGRATION=1` and -then provide the necessary configuration for that object store. +By default, storage integration tests are not run. These test use the `testcontainers` crate to start up a local MinIO server using docker on port 9000. -For some of the tests, [snapshots](https://datafusion.apache.org/contributor-guide/testing.html#snapshot-testing) are used. - -### AWS - -To test the S3 integration against [Minio](https://github.com/minio/minio) - -First start up a container with Minio and load test files. +To run them you will need to set `TEST_STORAGE_INTEGRATION`: ```shell -docker run -d \ - --name datafusion-test-minio \ - -p 9000:9000 \ - -e MINIO_ROOT_USER=TEST-DataFusionLogin \ - -e MINIO_ROOT_PASSWORD=TEST-DataFusionPassword \ - -v $(pwd)/../datafusion/core/tests/data:/source \ - quay.io/minio/minio server /data - -docker exec datafusion-test-minio /bin/sh -c "\ - mc ready local - mc alias set localminio http://localhost:9000 TEST-DataFusionLogin TEST-DataFusionPassword && \ - mc mb localminio/data && \ - mc cp -r /source/* localminio/data" +TEST_STORAGE_INTEGRATION=1 cargo test ``` -Setup environment +For some of the tests, [snapshots](https://datafusion.apache.org/contributor-guide/testing.html#snapshot-testing) are used. -```shell -export TEST_STORAGE_INTEGRATION=1 -export AWS_ACCESS_KEY_ID=TEST-DataFusionLogin -export AWS_SECRET_ACCESS_KEY=TEST-DataFusionPassword -export AWS_ENDPOINT=http://127.0.0.1:9000 -export AWS_ALLOW_HTTP=true -``` +### AWS -Note that `AWS_ENDPOINT` is set without slash at the end. +S3 integration is tested against [Minio](https://github.com/minio/minio) with [TestContainers](https://github.com/testcontainers/testcontainers-rs) +This requires Docker to be running on your machine and port 9000 to be free. -Run tests +If you see an error mentioning "failed to load IMDS session token" such as -```shell -cargo test -``` +> ---- object_storage::tests::s3_object_store_builder_resolves_region_when_none_provided stdout ---- +> Error: ObjectStore(Generic { store: "S3", source: "Error getting credentials from provider: an error occurred while loading credentials: failed to load IMDS session token" }) + +You my need to disable trying to fetch S3 credentials from the environment using the `AWS_EC2_METADATA_DISABLED`, for example: + +> $ AWS_EC2_METADATA_DISABLED=true TEST_STORAGE_INTEGRATION=1 cargo test diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index ccf8f4b572323..e74b00825845c 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -37,9 +37,9 @@ backtrace = ["datafusion/backtrace"] [dependencies] arrow = { workspace = true } async-trait = { workspace = true } -aws-config = "1.8.1" -aws-credential-types = "1.2.0" -clap = { version = "4.5.40", features = ["derive", "cargo"] } +aws-config = "1.8.3" +aws-credential-types = "1.2.4" +clap = { version = "4.5.41", features = ["derive", "cargo"] } datafusion = { workspace = true, features = [ "avro", "crypto_expressions", @@ -72,3 +72,5 @@ insta = { workspace = true } insta-cmd = "0.6.0" predicates = "3.0" rstest = { workspace = true } +testcontainers = { workspace = true } +testcontainers-modules = { workspace = true, features = ["minio"] } diff --git a/datafusion-cli/src/exec.rs b/datafusion-cli/src/exec.rs index ce190c1b40d3c..eb7174dbbd6f2 100644 --- a/datafusion-cli/src/exec.rs +++ b/datafusion-cli/src/exec.rs @@ -214,7 +214,6 @@ pub(super) async fn exec_and_print( print_options: &PrintOptions, sql: String, ) -> Result<()> { - let now = Instant::now(); let task_ctx = ctx.task_ctx(); let options = task_ctx.session_config().options(); let dialect = &options.sql_parser.dialect; @@ -228,25 +227,43 @@ pub(super) async fn exec_and_print( let statements = DFParser::parse_sql_with_dialect(&sql, dialect.as_ref())?; for statement in statements { - let adjusted = - AdjustedPrintOptions::new(print_options.clone()).with_statement(&statement); + StatementExecutor::new(statement) + .execute(ctx, print_options) + .await?; + } - let plan = create_plan(ctx, statement.clone(), false).await?; - let adjusted = adjusted.with_plan(&plan); + Ok(()) +} - let df = match ctx.execute_logical_plan(plan).await { - Ok(df) => df, - Err(DataFusionError::ObjectStore(Generic { store, source: _ })) - if "S3".eq_ignore_ascii_case(store) - && matches!(&statement, Statement::CreateExternalTable(_)) => - { - warn!("S3 region is incorrect, auto-detecting the correct region (this may be slow). Consider updating your region configuration."); - let plan = create_plan(ctx, statement, true).await?; - ctx.execute_logical_plan(plan).await? - } - Err(e) => return Err(e), - }; +/// Executor for SQL statements, including special handling for S3 region detection retry logic +struct StatementExecutor { + statement: Statement, + statement_for_retry: Option, +} + +impl StatementExecutor { + fn new(statement: Statement) -> Self { + let statement_for_retry = matches!(statement, Statement::CreateExternalTable(_)) + .then(|| statement.clone()); + + Self { + statement, + statement_for_retry, + } + } + + async fn execute( + self, + ctx: &dyn CliSessionContext, + print_options: &PrintOptions, + ) -> Result<()> { + let now = Instant::now(); + let (df, adjusted) = self + .create_and_execute_logical_plan(ctx, print_options) + .await?; let physical_plan = df.create_physical_plan().await?; + let task_ctx = ctx.task_ctx(); + let options = task_ctx.session_config().options(); // Track memory usage for the query result if it's bounded let mut reservation = @@ -296,9 +313,38 @@ pub(super) async fn exec_and_print( )?; reservation.free(); } + + Ok(()) } - Ok(()) + async fn create_and_execute_logical_plan( + mut self, + ctx: &dyn CliSessionContext, + print_options: &PrintOptions, + ) -> Result<(datafusion::dataframe::DataFrame, AdjustedPrintOptions)> { + let adjusted = AdjustedPrintOptions::new(print_options.clone()) + .with_statement(&self.statement); + + let plan = create_plan(ctx, self.statement, false).await?; + let adjusted = adjusted.with_plan(&plan); + + let df = match ctx.execute_logical_plan(plan).await { + Ok(df) => Ok(df), + Err(DataFusionError::ObjectStore(err)) + if matches!(err.as_ref(), Generic { store, source: _ } if "S3".eq_ignore_ascii_case(store)) + && self.statement_for_retry.is_some() => + { + warn!("S3 region is incorrect, auto-detecting the correct region (this may be slow). Consider updating your region configuration."); + let plan = + create_plan(ctx, self.statement_for_retry.take().unwrap(), true) + .await?; + ctx.execute_logical_plan(plan).await + } + Err(e) => Err(e), + }?; + + Ok((df, adjusted)) + } } /// Track adjustments to the print options based on the plan / statement being executed diff --git a/datafusion-cli/src/object_storage.rs b/datafusion-cli/src/object_storage.rs index 176dfdd4ceede..de33e11fe0100 100644 --- a/datafusion-cli/src/object_storage.rs +++ b/datafusion-cli/src/object_storage.rs @@ -15,27 +15,35 @@ // specific language governing permissions and limitations // under the License. -use std::any::Any; -use std::error::Error; -use std::fmt::{Debug, Display}; -use std::sync::Arc; - -use datafusion::common::config::{ - ConfigEntry, ConfigExtension, ConfigField, ExtensionOptions, TableOptions, Visit, -}; -use datafusion::common::{config_err, exec_datafusion_err, exec_err}; -use datafusion::error::{DataFusionError, Result}; -use datafusion::execution::context::SessionState; - use async_trait::async_trait; use aws_config::BehaviorVersion; -use aws_credential_types::provider::error::CredentialsError; -use aws_credential_types::provider::{ProvideCredentials, SharedCredentialsProvider}; +use aws_credential_types::provider::{ + error::CredentialsError, ProvideCredentials, SharedCredentialsProvider, +}; +use datafusion::{ + common::{ + config::ConfigEntry, config::ConfigExtension, config::ConfigField, + config::ExtensionOptions, config::TableOptions, config::Visit, config_err, + exec_datafusion_err, exec_err, + }, + error::{DataFusionError, Result}, + execution::context::SessionState, +}; use log::debug; -use object_store::aws::{AmazonS3Builder, AmazonS3ConfigKey, AwsCredential}; -use object_store::gcp::GoogleCloudStorageBuilder; -use object_store::http::HttpBuilder; -use object_store::{ClientOptions, CredentialProvider, ObjectStore}; +use object_store::{ + aws::{AmazonS3Builder, AmazonS3ConfigKey, AwsCredential}, + gcp::GoogleCloudStorageBuilder, + http::HttpBuilder, + ClientOptions, CredentialProvider, + Error::Generic, + ObjectStore, +}; +use std::{ + any::Any, + error::Error, + fmt::{Debug, Display}, + sync::Arc, +}; use url::Url; #[cfg(not(test))] @@ -153,10 +161,10 @@ impl CredentialsFromConfig { let credentials = config .credentials_provider() .ok_or_else(|| { - DataFusionError::ObjectStore(object_store::Error::Generic { + DataFusionError::ObjectStore(Box::new(Generic { store: "S3", source: "Failed to get S3 credentials aws_config".into(), - }) + })) })? .clone(); @@ -183,10 +191,10 @@ impl CredentialsFromConfig { "Error getting credentials from provider: {e}{source_message}", ); - return Err(DataFusionError::ObjectStore(object_store::Error::Generic { + return Err(DataFusionError::ObjectStore(Box::new(Generic { store: "S3", source: message.into(), - })); + }))); } }; Ok(Self { @@ -206,12 +214,14 @@ impl CredentialProvider for S3CredentialProvider { type Credential = AwsCredential; async fn get_credential(&self) -> object_store::Result> { - let creds = self.credentials.provide_credentials().await.map_err(|e| { - object_store::Error::Generic { - store: "S3", - source: Box::new(e), - } - })?; + let creds = + self.credentials + .provide_credentials() + .await + .map_err(|e| Generic { + store: "S3", + source: Box::new(e), + })?; Ok(Arc::new(AwsCredential { key_id: creds.access_key_id().to_string(), secret_key: creds.secret_access_key().to_string(), @@ -570,6 +580,9 @@ mod tests { #[tokio::test] async fn s3_object_store_builder_default() -> Result<()> { let location = "s3://bucket/path/FAKE/file.parquet"; + // Set it to a non-existent file to avoid reading the default configuration file + std::env::set_var("AWS_CONFIG_FILE", "data/aws.config"); + std::env::set_var("AWS_SHARED_CREDENTIALS_FILE", "data/aws.credentials"); // No options let table_url = ListingTableUrl::parse(location)?; @@ -722,6 +735,8 @@ mod tests { async fn s3_object_store_builder_resolves_region_when_none_provided() -> Result<()> { let expected_region = "eu-central-1"; let location = "s3://test-bucket/path/file.parquet"; + // Set it to a non-existent file to avoid reading the default configuration file + std::env::set_var("AWS_CONFIG_FILE", "data/aws.config"); let table_url = ListingTableUrl::parse(location)?; let aws_options = AwsOptions { diff --git a/datafusion-cli/tests/cli_integration.rs b/datafusion-cli/tests/cli_integration.rs index 108651281dfcc..1b937ea2168f0 100644 --- a/datafusion-cli/tests/cli_integration.rs +++ b/datafusion-cli/tests/cli_integration.rs @@ -21,7 +21,12 @@ use rstest::rstest; use insta::{glob, Settings}; use insta_cmd::{assert_cmd_snapshot, get_cargo_bin}; +use std::path::PathBuf; use std::{env, fs}; +use testcontainers::core::{CmdWaitFor, ExecCommand, Mount}; +use testcontainers::runners::AsyncRunner; +use testcontainers::{ContainerAsync, ImageExt, TestcontainersError}; +use testcontainers_modules::minio; fn cli() -> Command { Command::new(get_cargo_bin("datafusion-cli")) @@ -32,9 +37,87 @@ fn make_settings() -> Settings { settings.set_prepend_module_to_snapshot(false); settings.add_filter(r"Elapsed .* seconds\.", "[ELAPSED]"); settings.add_filter(r"DataFusion CLI v.*", "[CLI_VERSION]"); + settings.add_filter(r"(?s)backtrace:.*?\n\n\n", ""); settings } +async fn setup_minio_container() -> ContainerAsync { + const MINIO_ROOT_USER: &str = "TEST-DataFusionLogin"; + const MINIO_ROOT_PASSWORD: &str = "TEST-DataFusionPassword"; + + let data_path = + PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../datafusion/core/tests/data"); + + let absolute_data_path = data_path + .canonicalize() + .expect("Failed to get absolute path for test data"); + + let container = minio::MinIO::default() + .with_env_var("MINIO_ROOT_USER", MINIO_ROOT_USER) + .with_env_var("MINIO_ROOT_PASSWORD", MINIO_ROOT_PASSWORD) + .with_mount(Mount::bind_mount( + absolute_data_path.to_str().unwrap(), + "/source", + )) + .start() + .await; + + match container { + Ok(container) => { + // We wait for MinIO to be healthy and preprare test files. We do it via CLI to avoid s3 dependency + let commands = [ + ExecCommand::new(["/usr/bin/mc", "ready", "local"]), + ExecCommand::new([ + "/usr/bin/mc", + "alias", + "set", + "localminio", + "http://localhost:9000", + MINIO_ROOT_USER, + MINIO_ROOT_PASSWORD, + ]), + ExecCommand::new(["/usr/bin/mc", "mb", "localminio/data"]), + ExecCommand::new([ + "/usr/bin/mc", + "cp", + "-r", + "/source/", + "localminio/data/", + ]), + ]; + + for command in commands { + let command = + command.with_cmd_ready_condition(CmdWaitFor::Exit { code: Some(0) }); + + let cmd_ref = format!("{command:?}"); + + if let Err(e) = container.exec(command).await { + let stdout = container.stdout_to_vec().await.unwrap_or_default(); + let stderr = container.stderr_to_vec().await.unwrap_or_default(); + + panic!( + "Failed to execute command: {}\nError: {}\nStdout: {:?}\nStderr: {:?}", + cmd_ref, + e, + String::from_utf8_lossy(&stdout), + String::from_utf8_lossy(&stderr) + ); + } + } + + container + } + + Err(TestcontainersError::Client(e)) => { + panic!("Failed to start MinIO container. Ensure Docker is running and accessible: {e}"); + } + Err(e) => { + panic!("Failed to start MinIO container: {e}"); + } + } +} + #[cfg(test)] #[ctor::ctor] fn init() { @@ -165,12 +248,22 @@ async fn test_cli() { return; } + let container = setup_minio_container().await; + let settings = make_settings(); let _bound = settings.bind_to_scope(); + let port = container.get_host_port_ipv4(9000).await.unwrap(); + glob!("sql/integration/*.sql", |path| { let input = fs::read_to_string(path).unwrap(); - assert_cmd_snapshot!(cli().pass_stdin(input)) + assert_cmd_snapshot!(cli() + .env_clear() + .env("AWS_ACCESS_KEY_ID", "TEST-DataFusionLogin") + .env("AWS_SECRET_ACCESS_KEY", "TEST-DataFusionPassword") + .env("AWS_ENDPOINT", format!("http://localhost:{port}")) + .env("AWS_ALLOW_HTTP", "true") + .pass_stdin(input)) }); } @@ -186,20 +279,17 @@ async fn test_aws_options() { let settings = make_settings(); let _bound = settings.bind_to_scope(); - let access_key_id = - env::var("AWS_ACCESS_KEY_ID").expect("AWS_ACCESS_KEY_ID is not set"); - let secret_access_key = - env::var("AWS_SECRET_ACCESS_KEY").expect("AWS_SECRET_ACCESS_KEY is not set"); - let endpoint_url = env::var("AWS_ENDPOINT").expect("AWS_ENDPOINT is not set"); + let container = setup_minio_container().await; + let port = container.get_host_port_ipv4(9000).await.unwrap(); let input = format!( r#"CREATE EXTERNAL TABLE CARS STORED AS CSV LOCATION 's3://data/cars.csv' OPTIONS( - 'aws.access_key_id' '{access_key_id}', - 'aws.secret_access_key' '{secret_access_key}', - 'aws.endpoint' '{endpoint_url}', + 'aws.access_key_id' 'TEST-DataFusionLogin', + 'aws.secret_access_key' 'TEST-DataFusionPassword', + 'aws.endpoint' 'http://localhost:{port}', 'aws.allow_http' 'true' ); diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml index b31708a5c1cc7..324d9f61b5b7d 100644 --- a/datafusion-examples/Cargo.toml +++ b/datafusion-examples/Cargo.toml @@ -52,6 +52,10 @@ path = "examples/external_dependency/dataframe-to-s3.rs" name = "query_aws_s3" path = "examples/external_dependency/query-aws-s3.rs" +[[example]] +name = "custom_file_casts" +path = "examples/custom_file_casts.rs" + [dev-dependencies] arrow = { workspace = true } # arrow_schema is required for record_batch! macro :sad: @@ -70,6 +74,7 @@ log = { workspace = true } mimalloc = { version = "0.1", default-features = false } object_store = { workspace = true, features = ["aws", "http"] } prost = { workspace = true } +serde_json = { workspace = true } tempfile = { workspace = true } test-utils = { path = "../test-utils" } tokio = { workspace = true, features = ["rt-multi-thread", "parking_lot"] } diff --git a/datafusion-examples/README.md b/datafusion-examples/README.md index 285762bb57e74..02f83b9bd0d9d 100644 --- a/datafusion-examples/README.md +++ b/datafusion-examples/README.md @@ -50,6 +50,7 @@ cargo run --example dataframe - [`advanced_udf.rs`](examples/advanced_udf.rs): Define and invoke a more complicated User Defined Scalar Function (UDF) - [`advanced_udwf.rs`](examples/advanced_udwf.rs): Define and invoke a more complicated User Defined Window Function (UDWF) - [`advanced_parquet_index.rs`](examples/advanced_parquet_index.rs): Creates a detailed secondary index that covers the contents of several parquet files +- [`async_udf.rs`](examples/async_udf.rs): Define and invoke an asynchronous User Defined Scalar Function (UDF) - [`analyzer_rule.rs`](examples/analyzer_rule.rs): Use a custom AnalyzerRule to change a query's semantics (row level access control) - [`catalog.rs`](examples/catalog.rs): Register the table into a custom catalog - [`composed_extension_codec`](examples/composed_extension_codec.rs): Example of using multiple extension codecs for serialization / deserialization @@ -65,6 +66,7 @@ cargo run --example dataframe - [`flight_sql_server.rs`](examples/flight/flight_sql_server.rs): Run DataFusion as a standalone process and execute SQL queries from JDBC clients - [`function_factory.rs`](examples/function_factory.rs): Register `CREATE FUNCTION` handler to implement SQL macros - [`optimizer_rule.rs`](examples/optimizer_rule.rs): Use a custom OptimizerRule to replace certain predicates +- [`parquet_embedded_index.rs`](examples/parquet_embedded_index.rs): Store a custom index inside a Parquet file and use it to speed up queries - [`parquet_encrypted.rs`](examples/parquet_encrypted.rs): Read and write encrypted Parquet files using DataFusion - [`parquet_index.rs`](examples/parquet_index.rs): Create an secondary index over several parquet files and use it to speed up queries - [`parquet_exec_visitor.rs`](examples/parquet_exec_visitor.rs): Extract statistics by visiting an ExecutionPlan after execution diff --git a/datafusion-examples/examples/async_udf.rs b/datafusion-examples/examples/async_udf.rs index 3037a971dfd98..22e759de40f7a 100644 --- a/datafusion-examples/examples/async_udf.rs +++ b/datafusion-examples/examples/async_udf.rs @@ -15,104 +15,103 @@ // specific language governing permissions and limitations // under the License. -use arrow::array::{ArrayIter, ArrayRef, AsArray, Int64Array, RecordBatch, StringArray}; -use arrow::compute::kernels::cmp::eq; +//! This example shows how to create and use "Async UDFs" in DataFusion. +//! +//! Async UDFs allow you to perform asynchronous operations, such as +//! making network requests. This can be used for tasks like fetching +//! data from an external API such as a LLM service or an external database. + +use arrow::array::{ArrayRef, BooleanArray, Int64Array, RecordBatch, StringArray}; use arrow_schema::{DataType, Field, Schema}; use async_trait::async_trait; +use datafusion::assert_batches_eq; +use datafusion::common::cast::as_string_view_array; use datafusion::common::error::Result; -use datafusion::common::types::{logical_int64, logical_string}; +use datafusion::common::not_impl_err; use datafusion::common::utils::take_function_args; -use datafusion::common::{internal_err, not_impl_err}; -use datafusion::config::ConfigOptions; +use datafusion::execution::SessionStateBuilder; use datafusion::logical_expr::async_udf::{AsyncScalarUDF, AsyncScalarUDFImpl}; use datafusion::logical_expr::{ - ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, - TypeSignatureClass, Volatility, + ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, }; -use datafusion::logical_expr_common::signature::Coercion; -use datafusion::physical_expr_common::datum::apply_cmp; -use datafusion::prelude::SessionContext; -use log::trace; +use datafusion::prelude::{SessionConfig, SessionContext}; use std::any::Any; use std::sync::Arc; #[tokio::main] async fn main() -> Result<()> { - let ctx: SessionContext = SessionContext::new(); - - let async_upper = AsyncUpper::new(); - let udf = AsyncScalarUDF::new(Arc::new(async_upper)); - ctx.register_udf(udf.into_scalar_udf()); - let async_equal = AsyncEqual::new(); + // Use a hard coded parallelism level of 4 so the explain plan + // is consistent across machines. + let config = SessionConfig::new().with_target_partitions(4); + let ctx = + SessionContext::from(SessionStateBuilder::new().with_config(config).build()); + + // Similarly to regular UDFs, you create an AsyncScalarUDF by implementing + // `AsyncScalarUDFImpl` and creating an instance of `AsyncScalarUDF`. + let async_equal = AskLLM::new(); let udf = AsyncScalarUDF::new(Arc::new(async_equal)); + + // Async UDFs are registered with the SessionContext, using the same + // `register_udf` method as regular UDFs. ctx.register_udf(udf.into_scalar_udf()); - ctx.register_batch("animal", animal()?)?; - // use Async UDF in the projection - // +---------------+----------------------------------------------------------------------------------------+ - // | plan_type | plan | - // +---------------+----------------------------------------------------------------------------------------+ - // | logical_plan | Projection: async_equal(a.id, Int64(1)) | - // | | SubqueryAlias: a | - // | | TableScan: animal projection=[id] | - // | physical_plan | ProjectionExec: expr=[__async_fn_0@1 as async_equal(a.id,Int64(1))] | - // | | AsyncFuncExec: async_expr=[async_expr(name=__async_fn_0, expr=async_equal(id@0, 1))] | - // | | CoalesceBatchesExec: target_batch_size=8192 | - // | | DataSourceExec: partitions=1, partition_sizes=[1] | - // | | | - // +---------------+----------------------------------------------------------------------------------------+ - ctx.sql("explain select async_equal(a.id, 1) from animal a") - .await? - .show() - .await?; + // Create a table named 'animal' with some sample data + ctx.register_batch("animal", animal()?)?; - // +----------------------------+ - // | async_equal(a.id,Int64(1)) | - // +----------------------------+ - // | true | - // | false | - // | false | - // | false | - // | false | - // +----------------------------+ - ctx.sql("select async_equal(a.id, 1) from animal a") + // You can use the async UDF as normal in SQL queries + // + // Note: Async UDFs can currently be used in the select list and filter conditions. + let results = ctx + .sql("select * from animal a where ask_llm(a.name, 'Is this animal furry?')") .await? - .show() + .collect() .await?; - // use Async UDF in the filter - // +---------------+--------------------------------------------------------------------------------------------+ - // | plan_type | plan | - // +---------------+--------------------------------------------------------------------------------------------+ - // | logical_plan | SubqueryAlias: a | - // | | Filter: async_equal(animal.id, Int64(1)) | - // | | TableScan: animal projection=[id, name] | - // | physical_plan | CoalesceBatchesExec: target_batch_size=8192 | - // | | FilterExec: __async_fn_0@2, projection=[id@0, name@1] | - // | | RepartitionExec: partitioning=RoundRobinBatch(12), input_partitions=1 | - // | | AsyncFuncExec: async_expr=[async_expr(name=__async_fn_0, expr=async_equal(id@0, 1))] | - // | | CoalesceBatchesExec: target_batch_size=8192 | - // | | DataSourceExec: partitions=1, partition_sizes=[1] | - // | | | - // +---------------+--------------------------------------------------------------------------------------------+ - ctx.sql("explain select * from animal a where async_equal(a.id, 1)") + assert_batches_eq!( + [ + "+----+------+", + "| id | name |", + "+----+------+", + "| 1 | cat |", + "| 2 | dog |", + "+----+------+", + ], + &results + ); + + // While the interface is the same for both normal and async UDFs, you can + // use `EXPLAIN` output to see that the async UDF uses a special + // `AsyncFuncExec` node in the physical plan: + let results = ctx + .sql("explain select * from animal a where ask_llm(a.name, 'Is this animal furry?')") .await? - .show() + .collect() .await?; - // +----+------+ - // | id | name | - // +----+------+ - // | 1 | cat | - // +----+------+ - ctx.sql("select * from animal a where async_equal(a.id, 1)") - .await? - .show() - .await?; + assert_batches_eq!( + [ + "+---------------+--------------------------------------------------------------------------------------------------------------------------------+", + "| plan_type | plan |", + "+---------------+--------------------------------------------------------------------------------------------------------------------------------+", + "| logical_plan | SubqueryAlias: a |", + "| | Filter: ask_llm(CAST(animal.name AS Utf8View), Utf8View(\"Is this animal furry?\")) |", + "| | TableScan: animal projection=[id, name] |", + "| physical_plan | CoalesceBatchesExec: target_batch_size=8192 |", + "| | FilterExec: __async_fn_0@2, projection=[id@0, name@1] |", + "| | RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 |", + "| | AsyncFuncExec: async_expr=[async_expr(name=__async_fn_0, expr=ask_llm(CAST(name@1 AS Utf8View), Is this animal furry?))] |", + "| | CoalesceBatchesExec: target_batch_size=8192 |", + "| | DataSourceExec: partitions=1, partition_sizes=[1] |", + "| | |", + "+---------------+--------------------------------------------------------------------------------------------------------------------------------+", + ], + &results + ); Ok(()) } +/// Returns a sample `RecordBatch` representing an "animal" table with two columns: fn animal() -> Result { let schema = Arc::new(Schema::new(vec![ Field::new("id", DataType::Int64, false), @@ -127,118 +126,45 @@ fn animal() -> Result { Ok(RecordBatch::try_new(schema, vec![id_array, name_array])?) } +/// An async UDF that simulates asking a large language model (LLM) service a +/// question based on the content of two columns. The UDF will return a boolean +/// indicating whether the LLM thinks the first argument matches the question in +/// the second argument. +/// +/// Since this is a simplified example, it does not call an LLM service, but +/// could be extended to do so in a real-world scenario. #[derive(Debug)] -pub struct AsyncUpper { +struct AskLLM { signature: Signature, } -impl Default for AsyncUpper { +impl Default for AskLLM { fn default() -> Self { Self::new() } } -impl AsyncUpper { +impl AskLLM { pub fn new() -> Self { Self { - signature: Signature::new( - TypeSignature::Coercible(vec![Coercion::Exact { - desired_type: TypeSignatureClass::Native(logical_string()), - }]), + signature: Signature::exact( + vec![DataType::Utf8View, DataType::Utf8View], Volatility::Volatile, ), } } } -#[async_trait] -impl ScalarUDFImpl for AsyncUpper { - fn as_any(&self) -> &dyn Any { - self - } - - fn name(&self) -> &str { - "async_upper" - } - - fn signature(&self) -> &Signature { - &self.signature - } - - fn return_type(&self, _arg_types: &[DataType]) -> Result { - Ok(DataType::Utf8) - } - - fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result { - not_impl_err!("AsyncUpper can only be called from async contexts") - } -} - -#[async_trait] -impl AsyncScalarUDFImpl for AsyncUpper { - fn ideal_batch_size(&self) -> Option { - Some(10) - } - - async fn invoke_async_with_args( - &self, - args: ScalarFunctionArgs, - _option: &ConfigOptions, - ) -> Result { - trace!("Invoking async_upper with args: {:?}", args); - let value = &args.args[0]; - let result = match value { - ColumnarValue::Array(array) => { - let string_array = array.as_string::(); - let iter = ArrayIter::new(string_array); - let result = iter - .map(|string| string.map(|s| s.to_uppercase())) - .collect::(); - Arc::new(result) as ArrayRef - } - _ => return internal_err!("Expected a string argument, got {:?}", value), - }; - Ok(result) - } -} - -#[derive(Debug)] -struct AsyncEqual { - signature: Signature, -} - -impl Default for AsyncEqual { - fn default() -> Self { - Self::new() - } -} - -impl AsyncEqual { - pub fn new() -> Self { - Self { - signature: Signature::new( - TypeSignature::Coercible(vec![ - Coercion::Exact { - desired_type: TypeSignatureClass::Native(logical_int64()), - }, - Coercion::Exact { - desired_type: TypeSignatureClass::Native(logical_int64()), - }, - ]), - Volatility::Volatile, - ), - } - } -} - -#[async_trait] -impl ScalarUDFImpl for AsyncEqual { +/// All async UDFs implement the `ScalarUDFImpl` trait, which provides the basic +/// information for the function, such as its name, signature, and return type. +/// [async_trait] +impl ScalarUDFImpl for AskLLM { fn as_any(&self) -> &dyn Any { self } fn name(&self) -> &str { - "async_equal" + "ask_llm" } fn signature(&self) -> &Signature { @@ -249,19 +175,60 @@ impl ScalarUDFImpl for AsyncEqual { Ok(DataType::Boolean) } + /// Since this is an async UDF, the `invoke_with_args` method will not be + /// called directly. fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result { - not_impl_err!("AsyncEqual can only be called from async contexts") + not_impl_err!("AskLLM can only be called from async contexts") } } +/// In addition to [`ScalarUDFImpl`], we also need to implement the +/// [`AsyncScalarUDFImpl`] trait. #[async_trait] -impl AsyncScalarUDFImpl for AsyncEqual { - async fn invoke_async_with_args( - &self, - args: ScalarFunctionArgs, - _option: &ConfigOptions, - ) -> Result { - let [arg1, arg2] = take_function_args(self.name(), &args.args)?; - apply_cmp(arg1, arg2, eq)?.to_array(args.number_rows) +impl AsyncScalarUDFImpl for AskLLM { + /// The `invoke_async_with_args` method is similar to `invoke_with_args`, + /// but it returns a `Future` that resolves to the result. + /// + /// Since this signature is `async`, it can do any `async` operations, such + /// as network requests. This method is run on the same tokio `Runtime` that + /// is processing the query, so you may wish to make actual network requests + /// on a different `Runtime`, as explained in the `thread_pools.rs` example + /// in this directory. + async fn invoke_async_with_args(&self, args: ScalarFunctionArgs) -> Result { + // in a real UDF you would likely want to special case constant + // arguments to improve performance, but this example converts the + // arguments to arrays for simplicity. + let args = ColumnarValue::values_to_arrays(&args.args)?; + let [content_column, question_column] = take_function_args(self.name(), args)?; + + // In a real function, you would use a library such as `reqwest` here to + // make an async HTTP request. Credentials and other configurations can + // be supplied via the `ConfigOptions` parameter. + + // In this example, we will simulate the LLM response by comparing the two + // input arguments using some static strings + let content_column = as_string_view_array(&content_column)?; + let question_column = as_string_view_array(&question_column)?; + + let result_array: BooleanArray = content_column + .iter() + .zip(question_column.iter()) + .map(|(a, b)| { + // If either value is null, return None + let a = a?; + let b = b?; + // Simulate an LLM response by checking the arguments to some + // hardcoded conditions. + if a.contains("cat") && b.contains("furry") + || a.contains("dog") && b.contains("furry") + { + Some(true) + } else { + Some(false) + } + }) + .collect(); + + Ok(Arc::new(result_array)) } } diff --git a/datafusion-examples/examples/custom_file_casts.rs b/datafusion-examples/examples/custom_file_casts.rs new file mode 100644 index 0000000000000..847aa8ad7f52a --- /dev/null +++ b/datafusion-examples/examples/custom_file_casts.rs @@ -0,0 +1,205 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use arrow::array::{record_batch, RecordBatch}; +use arrow::datatypes::{DataType, Field, FieldRef, Schema, SchemaRef}; + +use datafusion::assert_batches_eq; +use datafusion::common::not_impl_err; +use datafusion::common::tree_node::{Transformed, TransformedResult, TreeNode}; +use datafusion::common::{Result, ScalarValue}; +use datafusion::datasource::listing::{ + ListingTable, ListingTableConfig, ListingTableUrl, +}; +use datafusion::execution::context::SessionContext; +use datafusion::execution::object_store::ObjectStoreUrl; +use datafusion::parquet::arrow::ArrowWriter; +use datafusion::physical_expr::expressions::CastExpr; +use datafusion::physical_expr::schema_rewriter::{ + DefaultPhysicalExprAdapterFactory, PhysicalExprAdapter, PhysicalExprAdapterFactory, +}; +use datafusion::physical_expr::PhysicalExpr; +use datafusion::prelude::SessionConfig; +use object_store::memory::InMemory; +use object_store::path::Path; +use object_store::{ObjectStore, PutPayload}; + +// Example showing how to implement custom casting rules to adapt file schemas. +// This example enforces that casts must be stricly widening: if the file type is Int64 and the table type is Int32, it will error +// before even reading the data. +// Without this custom cast rule DataFusion would happily do the narrowing cast, potentially erroring only if it found a row with data it could not cast. + +#[tokio::main] +async fn main() -> Result<()> { + println!("=== Creating example data ==="); + + // Create a logical / table schema with an Int32 column + let logical_schema = + Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)])); + + // Create some data that can be cast (Int16 -> Int32 is widening) and some that cannot (Int64 -> Int32 is narrowing) + let store = Arc::new(InMemory::new()) as Arc; + let path = Path::from("good.parquet"); + let batch = record_batch!(("id", Int16, [1, 2, 3]))?; + write_data(&store, &path, &batch).await?; + let path = Path::from("bad.parquet"); + let batch = record_batch!(("id", Int64, [1, 2, 3]))?; + write_data(&store, &path, &batch).await?; + + // Set up query execution + let mut cfg = SessionConfig::new(); + // Turn on filter pushdown so that the PhysicalExprAdapter is used + cfg.options_mut().execution.parquet.pushdown_filters = true; + let ctx = SessionContext::new_with_config(cfg); + ctx.runtime_env() + .register_object_store(ObjectStoreUrl::parse("memory://")?.as_ref(), store); + + // Register our good and bad files via ListingTable + let listing_table_config = + ListingTableConfig::new(ListingTableUrl::parse("memory:///good.parquet")?) + .infer_options(&ctx.state()) + .await? + .with_schema(Arc::clone(&logical_schema)) + .with_expr_adapter_factory(Arc::new( + CustomCastPhysicalExprAdapterFactory::new(Arc::new( + DefaultPhysicalExprAdapterFactory, + )), + )); + let table = ListingTable::try_new(listing_table_config).unwrap(); + ctx.register_table("good_table", Arc::new(table))?; + let listing_table_config = + ListingTableConfig::new(ListingTableUrl::parse("memory:///bad.parquet")?) + .infer_options(&ctx.state()) + .await? + .with_schema(Arc::clone(&logical_schema)) + .with_expr_adapter_factory(Arc::new( + CustomCastPhysicalExprAdapterFactory::new(Arc::new( + DefaultPhysicalExprAdapterFactory, + )), + )); + let table = ListingTable::try_new(listing_table_config).unwrap(); + ctx.register_table("bad_table", Arc::new(table))?; + + println!("\n=== File with narrower schema is cast ==="); + let query = "SELECT id FROM good_table WHERE id > 1"; + println!("Query: {query}"); + let batches = ctx.sql(query).await?.collect().await?; + #[rustfmt::skip] + let expected = [ + "+----+", + "| id |", + "+----+", + "| 2 |", + "| 3 |", + "+----+", + ]; + arrow::util::pretty::print_batches(&batches)?; + assert_batches_eq!(expected, &batches); + + println!("\n=== File with wider schema errors ==="); + let query = "SELECT id FROM bad_table WHERE id > 1"; + println!("Query: {query}"); + match ctx.sql(query).await?.collect().await { + Ok(_) => panic!("Expected error for narrowing cast, but query succeeded"), + Err(e) => { + println!("Caught expected error: {e}"); + } + } + Ok(()) +} + +async fn write_data( + store: &dyn ObjectStore, + path: &Path, + batch: &RecordBatch, +) -> Result<()> { + let mut buf = vec![]; + let mut writer = ArrowWriter::try_new(&mut buf, batch.schema(), None)?; + writer.write(batch)?; + writer.close()?; + + let payload = PutPayload::from_bytes(buf.into()); + store.put(path, payload).await?; + Ok(()) +} + +/// Factory for creating DefaultValuePhysicalExprAdapter instances +#[derive(Debug)] +struct CustomCastPhysicalExprAdapterFactory { + inner: Arc, +} + +impl CustomCastPhysicalExprAdapterFactory { + fn new(inner: Arc) -> Self { + Self { inner } + } +} + +impl PhysicalExprAdapterFactory for CustomCastPhysicalExprAdapterFactory { + fn create( + &self, + logical_file_schema: SchemaRef, + physical_file_schema: SchemaRef, + ) -> Arc { + let inner = self + .inner + .create(logical_file_schema, Arc::clone(&physical_file_schema)); + Arc::new(CustomCastsPhysicalExprAdapter { + physical_file_schema, + inner, + }) + } +} + +/// Custom PhysicalExprAdapter that handles missing columns with default values from metadata +/// and wraps DefaultPhysicalExprAdapter for standard schema adaptation +#[derive(Debug, Clone)] +struct CustomCastsPhysicalExprAdapter { + physical_file_schema: SchemaRef, + inner: Arc, +} + +impl PhysicalExprAdapter for CustomCastsPhysicalExprAdapter { + fn rewrite(&self, mut expr: Arc) -> Result> { + // First delegate to the inner adapter to handle missing columns and discover any necessary casts + expr = self.inner.rewrite(expr)?; + // Now we can apply custom casting rules or even swap out all CastExprs for a custom cast kernel / expression + // For example, [DataFusion Comet](https://github.com/apache/datafusion-comet) has a [custom cast kernel](https://github.com/apache/datafusion-comet/blob/b4ac876ab420ed403ac7fc8e1b29f42f1f442566/native/spark-expr/src/conversion_funcs/cast.rs#L133-L138). + expr.transform(|expr| { + if let Some(cast) = expr.as_any().downcast_ref::() { + let input_data_type = cast.expr().data_type(&self.physical_file_schema)?; + let output_data_type = cast.data_type(&self.physical_file_schema)?; + if !cast.is_bigger_cast(&input_data_type) { + return not_impl_err!("Unsupported CAST from {input_data_type:?} to {output_data_type:?}") + } + } + Ok(Transformed::no(expr)) + }).data() + } + + fn with_partition_values( + &self, + partition_values: Vec<(FieldRef, ScalarValue)>, + ) -> Arc { + Arc::new(Self { + inner: self.inner.with_partition_values(partition_values), + ..self.clone() + }) + } +} diff --git a/datafusion-examples/examples/custom_file_format.rs b/datafusion-examples/examples/custom_file_format.rs index e9a4d71b16339..67fe642fd46ee 100644 --- a/datafusion-examples/examples/custom_file_format.rs +++ b/datafusion-examples/examples/custom_file_format.rs @@ -81,6 +81,10 @@ impl FileFormat for TSVFileFormat { } } + fn compression_type(&self) -> Option { + None + } + async fn infer_schema( &self, state: &dyn Session, diff --git a/datafusion-examples/examples/dataframe.rs b/datafusion-examples/examples/dataframe.rs index 57a28aeca0de2..a5ee571a14764 100644 --- a/datafusion-examples/examples/dataframe.rs +++ b/datafusion-examples/examples/dataframe.rs @@ -59,6 +59,7 @@ use tempfile::tempdir; /// * [query_to_date]: execute queries against parquet files #[tokio::main] async fn main() -> Result<()> { + env_logger::init(); // The SessionContext is the main high level API for interacting with DataFusion let ctx = SessionContext::new(); read_parquet(&ctx).await?; diff --git a/datafusion-examples/examples/default_column_values.rs b/datafusion-examples/examples/default_column_values.rs new file mode 100644 index 0000000000000..b504ef3aad6f1 --- /dev/null +++ b/datafusion-examples/examples/default_column_values.rs @@ -0,0 +1,398 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::any::Any; +use std::collections::HashMap; +use std::sync::Arc; + +use arrow::array::RecordBatch; +use arrow::datatypes::{DataType, Field, FieldRef, Schema, SchemaRef}; +use async_trait::async_trait; + +use datafusion::assert_batches_eq; +use datafusion::catalog::memory::DataSourceExec; +use datafusion::catalog::{Session, TableProvider}; +use datafusion::common::tree_node::{Transformed, TransformedResult, TreeNode}; +use datafusion::common::DFSchema; +use datafusion::common::{Result, ScalarValue}; +use datafusion::datasource::listing::PartitionedFile; +use datafusion::datasource::physical_plan::{FileScanConfigBuilder, ParquetSource}; +use datafusion::execution::context::SessionContext; +use datafusion::execution::object_store::ObjectStoreUrl; +use datafusion::logical_expr::utils::conjunction; +use datafusion::logical_expr::{Expr, TableProviderFilterPushDown, TableType}; +use datafusion::parquet::arrow::ArrowWriter; +use datafusion::parquet::file::properties::WriterProperties; +use datafusion::physical_expr::expressions::{CastExpr, Column, Literal}; +use datafusion::physical_expr::schema_rewriter::{ + DefaultPhysicalExprAdapterFactory, PhysicalExprAdapter, PhysicalExprAdapterFactory, +}; +use datafusion::physical_expr::PhysicalExpr; +use datafusion::physical_plan::ExecutionPlan; +use datafusion::prelude::{lit, SessionConfig}; +use futures::StreamExt; +use object_store::memory::InMemory; +use object_store::path::Path; +use object_store::{ObjectStore, PutPayload}; + +// Metadata key for storing default values in field metadata +const DEFAULT_VALUE_METADATA_KEY: &str = "example.default_value"; + +// Example showing how to implement custom default value handling for missing columns +// using field metadata and PhysicalExprAdapter. +// +// This example demonstrates how to: +// 1. Store default values in field metadata using a constant key +// 2. Create a custom PhysicalExprAdapter that reads these defaults +// 3. Inject default values for missing columns in filter predicates +// 4. Use the DefaultPhysicalExprAdapter as a fallback for standard schema adaptation +// 5. Wrap string default values in cast expressions for proper type conversion +// +// Important: PhysicalExprAdapter is specifically designed for rewriting filter predicates +// that get pushed down to file scans. For handling missing columns in projections, +// other mechanisms in DataFusion are used (like SchemaAdapter). +// +// The metadata-based approach provides a flexible way to store default values as strings +// and cast them to the appropriate types at query time. + +#[tokio::main] +async fn main() -> Result<()> { + println!("=== Creating example data with missing columns and default values ==="); + + // Create sample data where the logical schema has more columns than the physical schema + let (logical_schema, physical_schema, batch) = create_sample_data_with_defaults(); + + let store = InMemory::new(); + let buf = { + let mut buf = vec![]; + + let props = WriterProperties::builder() + .set_max_row_group_size(2) + .build(); + + let mut writer = + ArrowWriter::try_new(&mut buf, physical_schema.clone(), Some(props)) + .expect("creating writer"); + + writer.write(&batch).expect("Writing batch"); + writer.close().unwrap(); + buf + }; + let path = Path::from("example.parquet"); + let payload = PutPayload::from_bytes(buf.into()); + store.put(&path, payload).await?; + + // Create a custom table provider that handles missing columns with defaults + let table_provider = Arc::new(DefaultValueTableProvider::new(logical_schema)); + + // Set up query execution + let mut cfg = SessionConfig::new(); + cfg.options_mut().execution.parquet.pushdown_filters = true; + let ctx = SessionContext::new_with_config(cfg); + + // Register our table + ctx.register_table("example_table", table_provider)?; + + ctx.runtime_env().register_object_store( + ObjectStoreUrl::parse("memory://")?.as_ref(), + Arc::new(store), + ); + + println!("\n=== Demonstrating default value injection in filter predicates ==="); + let query = "SELECT id, name FROM example_table WHERE status = 'active' ORDER BY id"; + println!("Query: {query}"); + println!("Note: The 'status' column doesn't exist in the physical schema,"); + println!( + "but our adapter injects the default value 'active' for the filter predicate." + ); + + let batches = ctx.sql(query).await?.collect().await?; + + #[rustfmt::skip] + let expected = [ + "+----+-------+", + "| id | name |", + "+----+-------+", + "| 1 | Alice |", + "| 2 | Bob |", + "| 3 | Carol |", + "+----+-------+", + ]; + arrow::util::pretty::print_batches(&batches)?; + assert_batches_eq!(expected, &batches); + + println!("\n=== Key Insight ==="); + println!("This example demonstrates how PhysicalExprAdapter works:"); + println!("1. Physical schema only has 'id' and 'name' columns"); + println!("2. Logical schema has 'id', 'name', 'status', and 'priority' columns with defaults"); + println!("3. Our custom adapter intercepts filter expressions on missing columns"); + println!("4. Default values from metadata are injected as cast expressions"); + println!("5. The DefaultPhysicalExprAdapter handles other schema adaptations"); + println!("\nNote: PhysicalExprAdapter is specifically for filter predicates."); + println!("For projection columns, different mechanisms handle missing columns."); + + Ok(()) +} + +/// Create sample data with a logical schema that has default values in metadata +/// and a physical schema that's missing some columns +fn create_sample_data_with_defaults() -> (SchemaRef, SchemaRef, RecordBatch) { + // Create metadata for default values + let mut status_metadata = HashMap::new(); + status_metadata.insert(DEFAULT_VALUE_METADATA_KEY.to_string(), "active".to_string()); + + let mut priority_metadata = HashMap::new(); + priority_metadata.insert(DEFAULT_VALUE_METADATA_KEY.to_string(), "1".to_string()); + + // The logical schema includes all columns with their default values in metadata + // Note: We make the columns with defaults nullable to allow the default adapter to handle them + let logical_schema = Schema::new(vec![ + Field::new("id", DataType::Int32, false), + Field::new("name", DataType::Utf8, false), + Field::new("status", DataType::Utf8, true).with_metadata(status_metadata), + Field::new("priority", DataType::Int32, true).with_metadata(priority_metadata), + ]); + + // The physical schema only has some columns (simulating missing columns in storage) + let physical_schema = Schema::new(vec![ + Field::new("id", DataType::Int32, false), + Field::new("name", DataType::Utf8, false), + ]); + + // Create sample data for the physical schema + let batch = RecordBatch::try_new( + Arc::new(physical_schema.clone()), + vec![ + Arc::new(arrow::array::Int32Array::from(vec![1, 2, 3])), + Arc::new(arrow::array::StringArray::from(vec![ + "Alice", "Bob", "Carol", + ])), + ], + ) + .unwrap(); + + (Arc::new(logical_schema), Arc::new(physical_schema), batch) +} + +/// Custom TableProvider that uses DefaultValuePhysicalExprAdapter +#[derive(Debug)] +struct DefaultValueTableProvider { + schema: SchemaRef, +} + +impl DefaultValueTableProvider { + fn new(schema: SchemaRef) -> Self { + Self { schema } + } +} + +#[async_trait] +impl TableProvider for DefaultValueTableProvider { + fn as_any(&self) -> &dyn Any { + self + } + + fn schema(&self) -> SchemaRef { + self.schema.clone() + } + + fn table_type(&self) -> TableType { + TableType::Base + } + + fn supports_filters_pushdown( + &self, + filters: &[&Expr], + ) -> Result> { + Ok(vec![TableProviderFilterPushDown::Inexact; filters.len()]) + } + + async fn scan( + &self, + state: &dyn Session, + projection: Option<&Vec>, + filters: &[Expr], + limit: Option, + ) -> Result> { + let schema = self.schema.clone(); + let df_schema = DFSchema::try_from(schema.clone())?; + let filter = state.create_physical_expr( + conjunction(filters.iter().cloned()).unwrap_or_else(|| lit(true)), + &df_schema, + )?; + + let parquet_source = ParquetSource::default() + .with_predicate(filter) + .with_pushdown_filters(true); + + let object_store_url = ObjectStoreUrl::parse("memory://")?; + let store = state.runtime_env().object_store(object_store_url)?; + + let mut files = vec![]; + let mut listing = store.list(None); + while let Some(file) = listing.next().await { + if let Ok(file) = file { + files.push(file); + } + } + + let file_group = files + .iter() + .map(|file| PartitionedFile::new(file.location.clone(), file.size)) + .collect(); + + let file_scan_config = FileScanConfigBuilder::new( + ObjectStoreUrl::parse("memory://")?, + self.schema.clone(), + Arc::new(parquet_source), + ) + .with_projection(projection.cloned()) + .with_limit(limit) + .with_file_group(file_group) + .with_expr_adapter(Some(Arc::new(DefaultValuePhysicalExprAdapterFactory) as _)); + + Ok(Arc::new(DataSourceExec::new(Arc::new( + file_scan_config.build(), + )))) + } +} + +/// Factory for creating DefaultValuePhysicalExprAdapter instances +#[derive(Debug)] +struct DefaultValuePhysicalExprAdapterFactory; + +impl PhysicalExprAdapterFactory for DefaultValuePhysicalExprAdapterFactory { + fn create( + &self, + logical_file_schema: SchemaRef, + physical_file_schema: SchemaRef, + ) -> Arc { + let default_factory = DefaultPhysicalExprAdapterFactory; + let default_adapter = default_factory + .create(logical_file_schema.clone(), physical_file_schema.clone()); + + Arc::new(DefaultValuePhysicalExprAdapter { + logical_file_schema, + physical_file_schema, + default_adapter, + partition_values: Vec::new(), + }) + } +} + +/// Custom PhysicalExprAdapter that handles missing columns with default values from metadata +/// and wraps DefaultPhysicalExprAdapter for standard schema adaptation +#[derive(Debug)] +struct DefaultValuePhysicalExprAdapter { + logical_file_schema: SchemaRef, + physical_file_schema: SchemaRef, + default_adapter: Arc, + partition_values: Vec<(FieldRef, ScalarValue)>, +} + +impl PhysicalExprAdapter for DefaultValuePhysicalExprAdapter { + fn rewrite(&self, expr: Arc) -> Result> { + // First try our custom default value injection for missing columns + let rewritten = expr + .transform(|expr| { + self.inject_default_values( + expr, + &self.logical_file_schema, + &self.physical_file_schema, + ) + }) + .data()?; + + // Then apply the default adapter as a fallback to handle standard schema differences + // like type casting, partition column handling, etc. + let default_adapter = if !self.partition_values.is_empty() { + self.default_adapter + .with_partition_values(self.partition_values.clone()) + } else { + self.default_adapter.clone() + }; + + default_adapter.rewrite(rewritten) + } + + fn with_partition_values( + &self, + partition_values: Vec<(FieldRef, ScalarValue)>, + ) -> Arc { + Arc::new(DefaultValuePhysicalExprAdapter { + logical_file_schema: self.logical_file_schema.clone(), + physical_file_schema: self.physical_file_schema.clone(), + default_adapter: self.default_adapter.clone(), + partition_values, + }) + } +} + +impl DefaultValuePhysicalExprAdapter { + fn inject_default_values( + &self, + expr: Arc, + logical_file_schema: &Schema, + physical_file_schema: &Schema, + ) -> Result>> { + if let Some(column) = expr.as_any().downcast_ref::() { + let column_name = column.name(); + + // Check if this column exists in the physical schema + if physical_file_schema.index_of(column_name).is_err() { + // Column is missing from physical schema, check if logical schema has a default + if let Ok(logical_field) = + logical_file_schema.field_with_name(column_name) + { + if let Some(default_value_str) = + logical_field.metadata().get(DEFAULT_VALUE_METADATA_KEY) + { + // Create a string literal and wrap it in a cast expression + let default_literal = self.create_default_value_expr( + default_value_str, + logical_field.data_type(), + )?; + return Ok(Transformed::yes(default_literal)); + } + } + } + } + + // No transformation needed + Ok(Transformed::no(expr)) + } + + fn create_default_value_expr( + &self, + value_str: &str, + data_type: &DataType, + ) -> Result> { + // Create a string literal with the default value + let string_literal = + Arc::new(Literal::new(ScalarValue::Utf8(Some(value_str.to_string())))); + + // If the target type is already Utf8, return the string literal directly + if matches!(data_type, DataType::Utf8) { + return Ok(string_literal); + } + + // Otherwise, wrap the string literal in a cast expression + let cast_expr = Arc::new(CastExpr::new(string_literal, data_type.clone(), None)); + + Ok(cast_expr) + } +} diff --git a/datafusion-examples/examples/function_factory.rs b/datafusion-examples/examples/function_factory.rs index 21da359633452..425b0861fff7f 100644 --- a/datafusion-examples/examples/function_factory.rs +++ b/datafusion-examples/examples/function_factory.rs @@ -28,6 +28,7 @@ use datafusion::logical_expr::{ ColumnarValue, CreateFunction, Expr, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, Volatility, }; +use std::hash::{DefaultHasher, Hash, Hasher}; use std::result::Result as RResult; use std::sync::Arc; @@ -153,6 +154,38 @@ impl ScalarUDFImpl for ScalarFunctionWrapper { fn output_ordering(&self, _input: &[ExprProperties]) -> Result { Ok(SortProperties::Unordered) } + + fn equals(&self, other: &dyn ScalarUDFImpl) -> bool { + let Some(other) = other.as_any().downcast_ref::() else { + return false; + }; + let Self { + name, + expr, + signature, + return_type, + } = self; + name == &other.name + && expr == &other.expr + && signature == &other.signature + && return_type == &other.return_type + } + + fn hash_value(&self) -> u64 { + let Self { + name, + expr, + signature, + return_type, + } = self; + let mut hasher = DefaultHasher::new(); + std::any::type_name::().hash(&mut hasher); + name.hash(&mut hasher); + expr.hash(&mut hasher); + signature.hash(&mut hasher); + return_type.hash(&mut hasher); + hasher.finish() + } } impl ScalarFunctionWrapper { diff --git a/datafusion-examples/examples/json_shredding.rs b/datafusion-examples/examples/json_shredding.rs new file mode 100644 index 0000000000000..866e4a8a152c3 --- /dev/null +++ b/datafusion-examples/examples/json_shredding.rs @@ -0,0 +1,480 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::any::Any; +use std::sync::Arc; + +use arrow::array::{RecordBatch, StringArray}; +use arrow::datatypes::{DataType, Field, FieldRef, Schema, SchemaRef}; +use async_trait::async_trait; + +use datafusion::assert_batches_eq; +use datafusion::catalog::memory::DataSourceExec; +use datafusion::catalog::{Session, TableProvider}; +use datafusion::common::tree_node::{ + Transformed, TransformedResult, TreeNode, TreeNodeRecursion, +}; +use datafusion::common::{assert_contains, DFSchema, Result}; +use datafusion::datasource::listing::PartitionedFile; +use datafusion::datasource::physical_plan::{FileScanConfigBuilder, ParquetSource}; +use datafusion::execution::context::SessionContext; +use datafusion::execution::object_store::ObjectStoreUrl; +use datafusion::logical_expr::utils::conjunction; +use datafusion::logical_expr::{ + ColumnarValue, Expr, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, + TableProviderFilterPushDown, TableType, Volatility, +}; +use datafusion::parquet::arrow::ArrowWriter; +use datafusion::parquet::file::properties::WriterProperties; +use datafusion::physical_expr::schema_rewriter::{ + DefaultPhysicalExprAdapterFactory, PhysicalExprAdapter, PhysicalExprAdapterFactory, +}; +use datafusion::physical_expr::PhysicalExpr; +use datafusion::physical_expr::{expressions, ScalarFunctionExpr}; +use datafusion::physical_plan::ExecutionPlan; +use datafusion::prelude::{lit, SessionConfig}; +use datafusion::scalar::ScalarValue; +use futures::StreamExt; +use object_store::memory::InMemory; +use object_store::path::Path; +use object_store::{ObjectStore, PutPayload}; + +// Example showing how to implement custom filter rewriting for JSON shredding. +// +// JSON shredding is a technique for optimizing queries on semi-structured data +// by materializing commonly accessed fields into separate columns for better +// columnar storage performance. +// +// In this example, we have a table with both: +// - Original JSON data: data: '{"age": 30}' +// - Shredded flat columns: _data.name: "Alice" (extracted from JSON) +// +// Our custom TableProvider uses a PhysicalExprAdapter to rewrite +// expressions like `json_get_str('name', data)` to use the pre-computed +// flat column `_data.name` when available. This allows the query engine to: +// 1. Push down predicates for better filtering +// 2. Avoid expensive JSON parsing at query time +// 3. Leverage columnar storage benefits for the materialized fields +#[tokio::main] +async fn main() -> Result<()> { + println!("=== Creating example data with flat columns and underscore prefixes ==="); + + // Create sample data with flat columns using underscore prefixes + let (table_schema, batch) = create_sample_data(); + + let store = InMemory::new(); + let buf = { + let mut buf = vec![]; + + let props = WriterProperties::builder() + .set_max_row_group_size(2) + .build(); + + let mut writer = ArrowWriter::try_new(&mut buf, batch.schema(), Some(props)) + .expect("creating writer"); + + writer.write(&batch).expect("Writing batch"); + writer.close().unwrap(); + buf + }; + let path = Path::from("example.parquet"); + let payload = PutPayload::from_bytes(buf.into()); + store.put(&path, payload).await?; + + // Create a custom table provider that rewrites struct field access + let table_provider = Arc::new(ExampleTableProvider::new(table_schema)); + + // Set up query execution + let mut cfg = SessionConfig::new(); + cfg.options_mut().execution.parquet.pushdown_filters = true; + let ctx = SessionContext::new_with_config(cfg); + + // Register our table + ctx.register_table("structs", table_provider)?; + ctx.register_udf(ScalarUDF::new_from_impl(JsonGetStr::default())); + + ctx.runtime_env().register_object_store( + ObjectStoreUrl::parse("memory://")?.as_ref(), + Arc::new(store), + ); + + println!("\n=== Showing all data ==="); + let batches = ctx.sql("SELECT * FROM structs").await?.collect().await?; + arrow::util::pretty::print_batches(&batches)?; + + println!("\n=== Running query with flat column access and filter ==="); + let query = "SELECT json_get_str('age', data) as age FROM structs WHERE json_get_str('name', data) = 'Bob'"; + println!("Query: {query}"); + + let batches = ctx.sql(query).await?.collect().await?; + + #[rustfmt::skip] + let expected = [ + "+-----+", + "| age |", + "+-----+", + "| 25 |", + "+-----+", + ]; + arrow::util::pretty::print_batches(&batches)?; + assert_batches_eq!(expected, &batches); + + println!("\n=== Running explain analyze to confirm row group pruning ==="); + + let batches = ctx + .sql(&format!("EXPLAIN ANALYZE {query}")) + .await? + .collect() + .await?; + let plan = format!("{}", arrow::util::pretty::pretty_format_batches(&batches)?); + println!("{plan}"); + assert_contains!(&plan, "row_groups_pruned_statistics=1"); + assert_contains!(&plan, "pushdown_rows_pruned=1"); + + Ok(()) +} + +/// Create the example data with flat columns using underscore prefixes. +/// +/// This demonstrates the logical data structure: +/// - Table schema: What users see (just the 'data' JSON column) +/// - File schema: What's physically stored (both 'data' and materialized '_data.name') +/// +/// The naming convention uses underscore prefixes to indicate shredded columns: +/// - `data` -> original JSON column +/// - `_data.name` -> materialized field from JSON data.name +fn create_sample_data() -> (SchemaRef, RecordBatch) { + // The table schema only has the main data column - this is what users query against + let table_schema = Schema::new(vec![Field::new("data", DataType::Utf8, false)]); + + // The file schema has both the main column and the shredded flat column with underscore prefix + // This represents the actual physical storage with pre-computed columns + let file_schema = Schema::new(vec![ + Field::new("data", DataType::Utf8, false), // Original JSON data + Field::new("_data.name", DataType::Utf8, false), // Materialized name field + ]); + + let batch = create_sample_record_batch(&file_schema); + + (Arc::new(table_schema), batch) +} + +/// Create the actual RecordBatch with sample data +fn create_sample_record_batch(file_schema: &Schema) -> RecordBatch { + // Build a RecordBatch with flat columns + let data_array = StringArray::from(vec![ + r#"{"age": 30}"#, + r#"{"age": 25}"#, + r#"{"age": 35}"#, + r#"{"age": 22}"#, + ]); + let names_array = StringArray::from(vec!["Alice", "Bob", "Charlie", "Dave"]); + + RecordBatch::try_new( + Arc::new(file_schema.clone()), + vec![Arc::new(data_array), Arc::new(names_array)], + ) + .unwrap() +} + +/// Custom TableProvider that uses a StructFieldRewriter +#[derive(Debug)] +struct ExampleTableProvider { + schema: SchemaRef, +} + +impl ExampleTableProvider { + fn new(schema: SchemaRef) -> Self { + Self { schema } + } +} + +#[async_trait] +impl TableProvider for ExampleTableProvider { + fn as_any(&self) -> &dyn Any { + self + } + + fn schema(&self) -> SchemaRef { + self.schema.clone() + } + + fn table_type(&self) -> TableType { + TableType::Base + } + + fn supports_filters_pushdown( + &self, + filters: &[&Expr], + ) -> Result> { + // Implementers can choose to mark these filters as exact or inexact. + // If marked as exact they cannot have false positives and must always be applied. + // If marked as Inexact they can have false positives and at runtime the rewriter + // can decide to not rewrite / ignore some filters since they will be re-evaluated upstream. + // For the purposes of this example we mark them as Exact to demonstrate the rewriter is working and the filtering is not being re-evaluated upstream. + Ok(vec![TableProviderFilterPushDown::Exact; filters.len()]) + } + + async fn scan( + &self, + state: &dyn Session, + projection: Option<&Vec>, + filters: &[Expr], + limit: Option, + ) -> Result> { + let schema = self.schema.clone(); + let df_schema = DFSchema::try_from(schema.clone())?; + let filter = state.create_physical_expr( + conjunction(filters.iter().cloned()).unwrap_or_else(|| lit(true)), + &df_schema, + )?; + + let parquet_source = ParquetSource::default() + .with_predicate(filter) + .with_pushdown_filters(true); + + let object_store_url = ObjectStoreUrl::parse("memory://")?; + + let store = state.runtime_env().object_store(object_store_url)?; + + let mut files = vec![]; + let mut listing = store.list(None); + while let Some(file) = listing.next().await { + if let Ok(file) = file { + files.push(file); + } + } + + let file_group = files + .iter() + .map(|file| PartitionedFile::new(file.location.clone(), file.size)) + .collect(); + + let file_scan_config = FileScanConfigBuilder::new( + ObjectStoreUrl::parse("memory://")?, + schema, + Arc::new(parquet_source), + ) + .with_projection(projection.cloned()) + .with_limit(limit) + .with_file_group(file_group) + // if the rewriter needs a reference to the table schema you can bind self.schema() here + .with_expr_adapter(Some(Arc::new(ShreddedJsonRewriterFactory) as _)); + + Ok(Arc::new(DataSourceExec::new(Arc::new( + file_scan_config.build(), + )))) + } +} + +/// Scalar UDF that uses serde_json to access json fields +#[derive(Debug)] +pub struct JsonGetStr { + signature: Signature, + aliases: [String; 1], +} + +impl Default for JsonGetStr { + fn default() -> Self { + Self { + signature: Signature::variadic_any(Volatility::Immutable), + aliases: ["json_get_str".to_string()], + } + } +} + +impl ScalarUDFImpl for JsonGetStr { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + self.aliases[0].as_str() + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + Ok(DataType::Utf8) + } + + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { + assert!( + args.args.len() == 2, + "json_get_str requires exactly 2 arguments" + ); + let key = match &args.args[0] { + ColumnarValue::Scalar(ScalarValue::Utf8(Some(key))) => key, + _ => { + return Err(datafusion::error::DataFusionError::Execution( + "json_get_str first argument must be a string".to_string(), + )) + } + }; + // We expect a string array that contains JSON strings + let json_array = match &args.args[1] { + ColumnarValue::Array(array) => array + .as_any() + .downcast_ref::() + .ok_or_else(|| { + datafusion::error::DataFusionError::Execution( + "json_get_str second argument must be a string array".to_string(), + ) + })?, + _ => { + return Err(datafusion::error::DataFusionError::Execution( + "json_get_str second argument must be a string array".to_string(), + )) + } + }; + let values = json_array + .iter() + .map(|value| { + value.and_then(|v| { + let json_value: serde_json::Value = + serde_json::from_str(v).unwrap_or_default(); + json_value.get(key).map(|v| v.to_string()) + }) + }) + .collect::(); + Ok(ColumnarValue::Array(Arc::new(values))) + } + + fn aliases(&self) -> &[String] { + &self.aliases + } +} + +/// Factory for creating ShreddedJsonRewriter instances +#[derive(Debug)] +struct ShreddedJsonRewriterFactory; + +impl PhysicalExprAdapterFactory for ShreddedJsonRewriterFactory { + fn create( + &self, + logical_file_schema: SchemaRef, + physical_file_schema: SchemaRef, + ) -> Arc { + let default_factory = DefaultPhysicalExprAdapterFactory; + let default_adapter = default_factory + .create(logical_file_schema.clone(), physical_file_schema.clone()); + + Arc::new(ShreddedJsonRewriter { + logical_file_schema, + physical_file_schema, + default_adapter, + partition_values: Vec::new(), + }) + } +} + +/// Rewriter that converts json_get_str calls to direct flat column references +/// and wraps DefaultPhysicalExprAdapter for standard schema adaptation +#[derive(Debug)] +struct ShreddedJsonRewriter { + logical_file_schema: SchemaRef, + physical_file_schema: SchemaRef, + default_adapter: Arc, + partition_values: Vec<(FieldRef, ScalarValue)>, +} + +impl PhysicalExprAdapter for ShreddedJsonRewriter { + fn rewrite(&self, expr: Arc) -> Result> { + // First try our custom JSON shredding rewrite + let rewritten = expr + .transform(|expr| self.rewrite_impl(expr, &self.physical_file_schema)) + .data()?; + + // Then apply the default adapter as a fallback to handle standard schema differences + // like type casting, missing columns, and partition column handling + let default_adapter = if !self.partition_values.is_empty() { + self.default_adapter + .with_partition_values(self.partition_values.clone()) + } else { + self.default_adapter.clone() + }; + + default_adapter.rewrite(rewritten) + } + + fn with_partition_values( + &self, + partition_values: Vec<(FieldRef, ScalarValue)>, + ) -> Arc { + Arc::new(ShreddedJsonRewriter { + logical_file_schema: self.logical_file_schema.clone(), + physical_file_schema: self.physical_file_schema.clone(), + default_adapter: self.default_adapter.clone(), + partition_values, + }) + } +} + +impl ShreddedJsonRewriter { + fn rewrite_impl( + &self, + expr: Arc, + physical_file_schema: &Schema, + ) -> Result>> { + if let Some(func) = expr.as_any().downcast_ref::() { + if func.name() == "json_get_str" && func.args().len() == 2 { + // Get the key from the first argument + if let Some(literal) = func.args()[0] + .as_any() + .downcast_ref::() + { + if let ScalarValue::Utf8(Some(field_name)) = literal.value() { + // Get the column from the second argument + if let Some(column) = func.args()[1] + .as_any() + .downcast_ref::() + { + let column_name = column.name(); + // Check if there's a flat column with underscore prefix + let flat_column_name = format!("_{column_name}.{field_name}"); + + if let Ok(flat_field_index) = + physical_file_schema.index_of(&flat_column_name) + { + let flat_field = + physical_file_schema.field(flat_field_index); + + if flat_field.data_type() == &DataType::Utf8 { + // Replace the whole expression with a direct column reference + let new_expr = Arc::new(expressions::Column::new( + &flat_column_name, + flat_field_index, + )) + as Arc; + + return Ok(Transformed { + data: new_expr, + tnr: TreeNodeRecursion::Stop, + transformed: true, + }); + } + } + } + } + } + } + } + Ok(Transformed::no(expr)) + } +} diff --git a/datafusion-examples/examples/parquet_embedded_index.rs b/datafusion-examples/examples/parquet_embedded_index.rs new file mode 100644 index 0000000000000..5191ae48b3af7 --- /dev/null +++ b/datafusion-examples/examples/parquet_embedded_index.rs @@ -0,0 +1,477 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Embedding and using a custom index in Parquet files +//! +//! # Background +//! +//! This example shows how to add an application‑specific index to an Apache +//! Parquet file without modifying the Parquet format itself. The resulting +//! files can be read by any standard Parquet reader, which will simply +//! ignore the extra index data. +//! +//! A “distinct value” index, similar to a ["set" Skip Index in ClickHouse], +//! is stored in a custom binary format within the parquet file. Only the +//! location of index is stored in Parquet footer key/value metadata. +//! This approach is more efficient than storing the index itself in the footer +//! metadata because the footer must be read and parsed by all readers, +//! even those that do not use the index. +//! +//! This example uses a file level index for skipping entire files, but any +//! index can be stored using the same techniques and used skip row groups, +//! data pages, or rows using the APIs on [`TableProvider`] and [`ParquetSource`]. +//! +//! The resulting Parquet file layout is as follows: +//! +//! ```text +//! ┌──────────────────────┐ +//! │┌───────────────────┐ │ +//! ││ DataPage │ │ +//! │└───────────────────┘ │ +//! Standard Parquet │┌───────────────────┐ │ +//! Data Pages ││ DataPage │ │ +//! │└───────────────────┘ │ +//! │ ... │ +//! │┌───────────────────┐ │ +//! ││ DataPage │ │ +//! │└───────────────────┘ │ +//! │┏━━━━━━━━━━━━━━━━━━━┓ │ +//! Non standard │┃ ┃ │ +//! index (ignored by │┃Custom Binary Index┃ │ +//! other Parquet │┃ (Distinct Values) ┃◀│─ ─ ─ +//! readers) │┃ ┃ │ │ +//! │┗━━━━━━━━━━━━━━━━━━━┛ │ +//! Standard Parquet │┏━━━━━━━━━━━━━━━━━━━┓ │ │ key/value metadata +//! Page Index │┃ Page Index ┃ │ contains location +//! │┗━━━━━━━━━━━━━━━━━━━┛ │ │ of special index +//! │╔═══════════════════╗ │ +//! │║ Parquet Footer w/ ║ │ │ +//! │║ Metadata ║ ┼ ─ ─ +//! │║ (Thrift Encoded) ║ │ +//! │╚═══════════════════╝ │ +//! └──────────────────────┘ +//! +//! Parquet File +//! +//! # High Level Flow +//! +//! To create a custom Parquet index: +//! +//! 1. Compute the index and serialize it to a binary format. +//! +//! 2. Write the Parquet file with: +//! - regular data pages +//! - the serialized index inline +//! - footer key/value metadata entry to locate the index +//! +//! To read and use the index are: +//! +//! 1. Read and deserialize the file’s footer to locate the index. +//! +//! 2. Read and deserialize the index. +//! +//! 3. Create a `TableProvider` that knows how to use the index to quickly find +//! the relevant files, row groups, data pages or rows based on on pushed down +//! filters. +//! +//! # FAQ: Why do other Parquet readers skip over the custom index? +//! +//! The flow for reading a parquet file is: +//! +//! 1. Seek to the end of the file and read the last 8 bytes (a 4‑byte +//! little‑endian footer length followed by the `PAR1` magic bytes). +//! +//! 2. Seek backwards by that length to parse the Thrift‑encoded footer +//! metadata (including key/value pairs). +//! +//! 3. Read data required for decoding such as data pages based on the offsets +//! encoded in the metadata. +//! +//! Since parquet readers do not scan from the start of the file they will read +//! data in the file unless it is explicitly referenced in the footer metadata. +//! +//! Thus other readers will encounter and ignore an unknown key +//! (`distinct_index_offset`) in the footer key/value metadata. Unless they +//! know how to use that information, they will not attempt to read or +//! the bytes that make up the index. +//! +//! ["set" Skip Index in ClickHouse]: https://clickhouse.com/docs/optimize/skipping-indexes#set + +use arrow::array::{ArrayRef, StringArray}; +use arrow::record_batch::RecordBatch; +use arrow_schema::{DataType, Field, Schema, SchemaRef}; +use async_trait::async_trait; +use datafusion::catalog::{Session, TableProvider}; +use datafusion::common::{exec_err, HashMap, HashSet, Result}; +use datafusion::datasource::listing::PartitionedFile; +use datafusion::datasource::memory::DataSourceExec; +use datafusion::datasource::physical_plan::{FileScanConfigBuilder, ParquetSource}; +use datafusion::datasource::TableType; +use datafusion::execution::object_store::ObjectStoreUrl; +use datafusion::logical_expr::{Operator, TableProviderFilterPushDown}; +use datafusion::parquet::arrow::ArrowWriter; +use datafusion::parquet::errors::ParquetError; +use datafusion::parquet::file::metadata::{FileMetaData, KeyValue}; +use datafusion::parquet::file::reader::{FileReader, SerializedFileReader}; +use datafusion::physical_plan::ExecutionPlan; +use datafusion::prelude::*; +use datafusion::scalar::ScalarValue; +use std::fs::{read_dir, File}; +use std::io::{Read, Seek, SeekFrom, Write}; +use std::path::{Path, PathBuf}; +use std::sync::Arc; +use tempfile::TempDir; + +/// An index of distinct values for a single column +/// +/// In this example the index is a simple set of strings, but in a real +/// application it could be any arbitrary data structure. +/// +/// Also, this example indexes the distinct values for an entire file +/// but a real application could create multiple indexes for multiple +/// row groups and/or columns, depending on the use case. +#[derive(Debug, Clone)] +struct DistinctIndex { + inner: HashSet, +} + +impl DistinctIndex { + /// Create a DistinctIndex from an iterator of strings + pub fn new>(iter: I) -> Self { + Self { + inner: iter.into_iter().collect(), + } + } + + /// Returns true if the index contains the given value + pub fn contains(&self, value: &str) -> bool { + self.inner.contains(value) + } + + /// Serialize the distinct index to a writer as bytes + /// + /// In this example, we use a simple newline-separated format, + /// but a real application can use any arbitrary binary format. + /// + /// Note that we must use the ArrowWriter to write the index so that its + /// internal accounting of offsets can correctly track the actual size of + /// the file. If we wrote directly to the underlying writer, the PageIndex + /// written right before the would be incorrect as they would not account + /// for the extra bytes written. + fn serialize( + &self, + arrow_writer: &mut ArrowWriter, + ) -> Result<()> { + let serialized = self + .inner + .iter() + .map(|s| s.as_str()) + .collect::>() + .join("\n"); + let index_bytes = serialized.into_bytes(); + + // Set the offset for the index + let offset = arrow_writer.bytes_written(); + let index_len = index_bytes.len() as u64; + + println!("Writing custom index at offset: {offset}, length: {index_len}"); + // Write the index magic and length to the file + arrow_writer.write_all(INDEX_MAGIC)?; + arrow_writer.write_all(&index_len.to_le_bytes())?; + + // Write the index bytes + arrow_writer.write_all(&index_bytes)?; + + // Append metadata about the index to the Parquet file footer + arrow_writer.append_key_value_metadata(KeyValue::new( + "distinct_index_offset".to_string(), + offset.to_string(), + )); + Ok(()) + } + + /// Read the distinct values index from a reader at the given offset and length + pub fn new_from_reader(mut reader: R, offset: u64) -> Result { + reader.seek(SeekFrom::Start(offset))?; + + let mut magic_buf = [0u8; 4]; + reader.read_exact(&mut magic_buf)?; + if magic_buf != INDEX_MAGIC { + return exec_err!("Invalid index magic number at offset {offset}"); + } + + let mut len_buf = [0u8; 8]; + reader.read_exact(&mut len_buf)?; + let stored_len = u64::from_le_bytes(len_buf) as usize; + + let mut index_buf = vec![0u8; stored_len]; + reader.read_exact(&mut index_buf)?; + + let Ok(s) = String::from_utf8(index_buf) else { + return exec_err!("Invalid UTF-8 in index data"); + }; + + Ok(Self { + inner: s.lines().map(|s| s.to_string()).collect(), + }) + } +} + +/// DataFusion [`TableProvider]` that reads Parquet files and uses a +/// `DistinctIndex` to prune files based on pushed down filters. +#[derive(Debug)] +struct DistinctIndexTable { + /// The schema of the table + schema: SchemaRef, + /// Key is file name, value is DistinctIndex for that file + files_and_index: HashMap, + /// Directory containing the Parquet files + dir: PathBuf, +} + +impl DistinctIndexTable { + /// Create a new DistinctIndexTable for files in the given directory + /// + /// Scans the directory, reading the `DistinctIndex` from each file + fn try_new(dir: impl Into, schema: SchemaRef) -> Result { + let dir = dir.into(); + let mut index = HashMap::new(); + + for entry in read_dir(&dir)? { + let path = entry?.path(); + if path.extension().and_then(|s| s.to_str()) != Some("parquet") { + continue; + } + let file_name = path.file_name().unwrap().to_string_lossy().to_string(); + + let distinct_set = read_distinct_index(&path)?; + + println!("Read distinct index for {file_name}: {file_name:?}"); + index.insert(file_name, distinct_set); + } + + Ok(Self { + schema, + files_and_index: index, + dir, + }) + } +} + +/// Wrapper around ArrowWriter to write Parquet files with an embedded index +struct IndexedParquetWriter { + writer: ArrowWriter, +} + +/// Magic bytes to identify our custom index format +const INDEX_MAGIC: &[u8] = b"IDX1"; + +impl IndexedParquetWriter { + pub fn try_new(sink: W, schema: Arc) -> Result { + let writer = ArrowWriter::try_new(sink, schema, None)?; + Ok(Self { writer }) + } + + /// Write a RecordBatch to the Parquet file + pub fn write(&mut self, batch: &RecordBatch) -> Result<()> { + self.writer.write(batch)?; + Ok(()) + } + + /// Flush the current row group + pub fn flush(&mut self) -> Result<()> { + self.writer.flush()?; + Ok(()) + } + + /// Close the Parquet file, flushing any remaining data + pub fn close(self) -> Result<()> { + self.writer.close()?; + Ok(()) + } + + /// write the DistinctIndex to the Parquet file + pub fn write_index(&mut self, index: &DistinctIndex) -> Result<()> { + index.serialize(&mut self.writer) + } +} + +/// Write a Parquet file with a single column "category" containing the +/// strings in `values` and a DistinctIndex for that column. +fn write_file_with_index(path: &Path, values: &[&str]) -> Result<()> { + // form an input RecordBatch with the string values + let field = Field::new("category", DataType::Utf8, false); + let schema = Arc::new(Schema::new(vec![field.clone()])); + let arr: ArrayRef = Arc::new(StringArray::from(values.to_vec())); + let batch = RecordBatch::try_new(schema.clone(), vec![arr])?; + + // compute the distinct index + let distinct_index: DistinctIndex = + DistinctIndex::new(values.iter().map(|s| s.to_string())); + + let file = File::create(path)?; + + let mut writer = IndexedParquetWriter::try_new(file, schema.clone())?; + writer.write(&batch)?; + writer.flush()?; + writer.write_index(&distinct_index)?; + writer.close()?; + + println!("Finished writing file to {}", path.display()); + Ok(()) +} + +/// Read a `DistinctIndex` from a Parquet file +fn read_distinct_index(path: &Path) -> Result { + let file = File::open(path)?; + + let file_size = file.metadata()?.len(); + println!("Reading index from {} (size: {file_size})", path.display(),); + + let reader = SerializedFileReader::new(file.try_clone()?)?; + let meta = reader.metadata().file_metadata(); + + let offset = get_key_value(meta, "distinct_index_offset") + .ok_or_else(|| ParquetError::General("Missing index offset".into()))? + .parse::() + .map_err(|e| ParquetError::General(e.to_string()))?; + + println!("Reading index at offset: {offset}, length"); + DistinctIndex::new_from_reader(file, offset) +} + +/// Returns the value of a named key from the Parquet file metadata +/// +/// Returns None if the key is not found +fn get_key_value<'a>(file_meta_data: &'a FileMetaData, key: &'_ str) -> Option<&'a str> { + let kvs = file_meta_data.key_value_metadata()?; + let kv = kvs.iter().find(|kv| kv.key == key)?; + kv.value.as_deref() +} + +/// Implement TableProvider for DistinctIndexTable, using the distinct index to prune files +#[async_trait] +impl TableProvider for DistinctIndexTable { + fn as_any(&self) -> &dyn std::any::Any { + self + } + fn schema(&self) -> SchemaRef { + self.schema.clone() + } + fn table_type(&self) -> TableType { + TableType::Base + } + + /// Prune files before reading: only keep files whose distinct set + /// contains the filter value + async fn scan( + &self, + _ctx: &dyn Session, + _proj: Option<&Vec>, + filters: &[Expr], + _limit: Option, + ) -> Result> { + // This example only handles filters of the form + // `category = 'X'` where X is a string literal + // + // You can use `PruningPredicate` for much more general range and + // equality analysis or write your own custom logic. + let mut target: Option<&str> = None; + + if filters.len() == 1 { + if let Expr::BinaryExpr(expr) = &filters[0] { + if expr.op == Operator::Eq { + if let ( + Expr::Column(c), + Expr::Literal(ScalarValue::Utf8(Some(v)), _), + ) = (&*expr.left, &*expr.right) + { + if c.name == "category" { + println!("Filtering for category: {v}"); + target = Some(v); + } + } + } + } + } + // Determine which files to scan + let files_to_scan: Vec<_> = self + .files_and_index + .iter() + .filter_map(|(f, distinct_index)| { + // keep file if no target or target is in the distinct set + if target.is_none() || distinct_index.contains(target?) { + Some(f) + } else { + None + } + }) + .collect(); + + println!("Scanning only files: {files_to_scan:?}"); + + // Build ParquetSource to actually read the files + let url = ObjectStoreUrl::parse("file://")?; + let source = Arc::new(ParquetSource::default().with_enable_page_index(true)); + let mut builder = FileScanConfigBuilder::new(url, self.schema.clone(), source); + for file in files_to_scan { + let path = self.dir.join(file); + let len = std::fs::metadata(&path)?.len(); + // If the index contained information about row groups or pages, + // you could also pass that information here to further prune + // the data read from the file. + let partitioned_file = + PartitionedFile::new(path.to_str().unwrap().to_string(), len); + builder = builder.with_file(partitioned_file); + } + Ok(DataSourceExec::from_data_source(builder.build())) + } + + /// Tell DataFusion that we can handle filters on the "category" column + fn supports_filters_pushdown( + &self, + fs: &[&Expr], + ) -> Result> { + // Mark as inexact since pruning is file‑granular + Ok(vec![TableProviderFilterPushDown::Inexact; fs.len()]) + } +} + +#[tokio::main] +async fn main() -> Result<()> { + // 1. Create temp dir and write 3 Parquet files with different category sets + let tmp = TempDir::new()?; + let dir = tmp.path(); + write_file_with_index(&dir.join("a.parquet"), &["foo", "bar", "foo"])?; + write_file_with_index(&dir.join("b.parquet"), &["baz", "qux"])?; + write_file_with_index(&dir.join("c.parquet"), &["foo", "quux", "quux"])?; + + // 2. Register our custom TableProvider + let field = Field::new("category", DataType::Utf8, false); + let schema_ref = Arc::new(Schema::new(vec![field])); + let provider = Arc::new(DistinctIndexTable::try_new(dir, schema_ref.clone())?); + + let ctx = SessionContext::new(); + ctx.register_table("t", provider)?; + + // 3. Run a query: only files containing 'foo' get scanned. The rest are pruned. + // based on the distinct index. + let df = ctx.sql("SELECT * FROM t WHERE category = 'foo'").await?; + df.show().await?; + + Ok(()) +} diff --git a/datafusion-examples/examples/parquet_index.rs b/datafusion-examples/examples/parquet_index.rs index e5ae3cc86bfe5..a9c0d2c4dab3b 100644 --- a/datafusion-examples/examples/parquet_index.rs +++ b/datafusion-examples/examples/parquet_index.rs @@ -71,7 +71,7 @@ use url::Url; /// (using the same underlying APIs) /// /// For a more advanced example of using an index to prune row groups within a -/// file, see the (forthcoming) `advanced_parquet_index` example. +/// file, see the `advanced_parquet_index` example. /// /// # Diagram /// diff --git a/datafusion-examples/examples/sql_analysis.rs b/datafusion-examples/examples/sql_analysis.rs index d3826026a9725..4ff669faf1d0c 100644 --- a/datafusion-examples/examples/sql_analysis.rs +++ b/datafusion-examples/examples/sql_analysis.rs @@ -274,7 +274,10 @@ from for table in tables { ctx.register_table( table.name, - Arc::new(MemTable::try_new(Arc::new(table.schema.clone()), vec![])?), + Arc::new(MemTable::try_new( + Arc::new(table.schema.clone()), + vec![vec![]], + )?), )?; } // We can create a LogicalPlan from a SQL query like this diff --git a/datafusion/catalog/src/default_table_source.rs b/datafusion/catalog/src/default_table_source.rs index 9db8242caa999..c61c7919ea5d9 100644 --- a/datafusion/catalog/src/default_table_source.rs +++ b/datafusion/catalog/src/default_table_source.rs @@ -33,8 +33,6 @@ use datafusion_expr::{Expr, TableProviderFilterPushDown, TableSource, TableType} /// /// It is used so logical plans in the `datafusion_expr` crate do not have a /// direct dependency on physical plans, such as [`TableProvider`]s. -/// -/// [`TableProvider`]: https://docs.rs/datafusion/latest/datafusion/datasource/provider/trait.TableProvider.html pub struct DefaultTableSource { /// table provider pub table_provider: Arc, diff --git a/datafusion/catalog/src/memory/table.rs b/datafusion/catalog/src/memory/table.rs index e996e1974d9e8..63b626fb6cce6 100644 --- a/datafusion/catalog/src/memory/table.rs +++ b/datafusion/catalog/src/memory/table.rs @@ -67,8 +67,16 @@ pub struct MemTable { } impl MemTable { - /// Create a new in-memory table from the provided schema and record batches + /// Create a new in-memory table from the provided schema and record batches. + /// + /// Requires at least one partition. To construct an empty `MemTable`, pass + /// `vec![vec![]]` as the `partitions` argument, this represents one partition with + /// no batches. pub fn try_new(schema: SchemaRef, partitions: Vec>) -> Result { + if partitions.is_empty() { + return plan_err!("No partitions provided, expected at least one partition"); + } + for batches in partitions.iter().flatten() { let batches_schema = batches.schema(); if !schema.contains(&batches_schema) { diff --git a/datafusion/catalog/src/stream.rs b/datafusion/catalog/src/stream.rs index 99c432b738e5b..0fab9beba81f9 100644 --- a/datafusion/catalog/src/stream.rs +++ b/datafusion/catalog/src/stream.rs @@ -435,6 +435,6 @@ impl DataSink for StreamWrite { write_task .join_unwind() .await - .map_err(DataFusionError::ExecutionJoin)? + .map_err(|e| DataFusionError::ExecutionJoin(Box::new(e)))? } } diff --git a/datafusion/catalog/src/streaming.rs b/datafusion/catalog/src/streaming.rs index 654e6755d7d4c..6ab95266e49d0 100644 --- a/datafusion/catalog/src/streaming.rs +++ b/datafusion/catalog/src/streaming.rs @@ -20,15 +20,17 @@ use std::any::Any; use std::sync::Arc; -use arrow::datatypes::SchemaRef; -use async_trait::async_trait; - use crate::Session; use crate::TableProvider; -use datafusion_common::{plan_err, Result}; -use datafusion_expr::{Expr, TableType}; + +use arrow::datatypes::SchemaRef; +use datafusion_common::{plan_err, DFSchema, Result}; +use datafusion_expr::{Expr, SortExpr, TableType}; +use datafusion_physical_expr::{create_physical_sort_exprs, LexOrdering}; use datafusion_physical_plan::streaming::{PartitionStream, StreamingTableExec}; use datafusion_physical_plan::ExecutionPlan; + +use async_trait::async_trait; use log::debug; /// A [`TableProvider`] that streams a set of [`PartitionStream`] @@ -37,6 +39,7 @@ pub struct StreamingTable { schema: SchemaRef, partitions: Vec>, infinite: bool, + sort_order: Vec, } impl StreamingTable { @@ -60,13 +63,21 @@ impl StreamingTable { schema, partitions, infinite: false, + sort_order: vec![], }) } + /// Sets streaming table can be infinite. pub fn with_infinite_table(mut self, infinite: bool) -> Self { self.infinite = infinite; self } + + /// Sets the existing ordering of streaming table. + pub fn with_sort_order(mut self, sort_order: Vec) -> Self { + self.sort_order = sort_order; + self + } } #[async_trait] @@ -85,16 +96,25 @@ impl TableProvider for StreamingTable { async fn scan( &self, - _state: &dyn Session, + state: &dyn Session, projection: Option<&Vec>, _filters: &[Expr], limit: Option, ) -> Result> { + let physical_sort = if !self.sort_order.is_empty() { + let df_schema = DFSchema::try_from(self.schema.as_ref().clone())?; + let eqp = state.execution_props(); + + create_physical_sort_exprs(&self.sort_order, &df_schema, eqp)? + } else { + vec![] + }; + Ok(Arc::new(StreamingTableExec::try_new( Arc::clone(&self.schema), self.partitions.clone(), projection, - None, + LexOrdering::new(physical_sort), self.infinite, limit, )?)) diff --git a/datafusion/common-runtime/Cargo.toml b/datafusion/common-runtime/Cargo.toml index 7ddc021e640c9..905a19747da93 100644 --- a/datafusion/common-runtime/Cargo.toml +++ b/datafusion/common-runtime/Cargo.toml @@ -43,4 +43,4 @@ log = { workspace = true } tokio = { workspace = true } [dev-dependencies] -tokio = { version = "1.45", features = ["rt", "rt-multi-thread", "time"] } +tokio = { version = "1.46", features = ["rt", "rt-multi-thread", "time"] } diff --git a/datafusion/common-runtime/src/common.rs b/datafusion/common-runtime/src/common.rs index e7aba1d455ee6..cebd6e04cd1b1 100644 --- a/datafusion/common-runtime/src/common.rs +++ b/datafusion/common-runtime/src/common.rs @@ -68,15 +68,28 @@ impl SpawnedTask { } /// Joins the task and unwinds the panic if it happens. - pub async fn join_unwind(self) -> Result { + pub async fn join_unwind(mut self) -> Result { + self.join_unwind_mut().await + } + + /// Joins the task using a mutable reference and unwinds the panic if it happens. + /// + /// This method is similar to [`join_unwind`](Self::join_unwind), but takes a mutable + /// reference instead of consuming `self`. This allows the `SpawnedTask` to remain + /// usable after the call. + /// + /// If called multiple times on the same task: + /// - If the task is still running, it will continue waiting for completion + /// - If the task has already completed successfully, subsequent calls will + /// continue to return the same `JoinError` indicating the task is finished + /// - If the task panicked, the first call will resume the panic, and the + /// program will not reach subsequent calls + pub async fn join_unwind_mut(&mut self) -> Result { self.await.map_err(|e| { // `JoinError` can be caused either by panic or cancellation. We have to handle panics: if e.is_panic() { std::panic::resume_unwind(e.into_panic()); } else { - // Cancellation may be caused by two reasons: - // 1. Abort is called, but since we consumed `self`, it's not our case (`JoinHandle` not accessible outside). - // 2. The runtime is shutting down. log::warn!("SpawnedTask was polled during shutdown"); e } diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index b356f249b79bb..83e539e31d726 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -40,9 +40,15 @@ name = "datafusion_common" [features] avro = ["apache-avro"] backtrace = [] +parquet_encryption = [ + "parquet", + "parquet/encryption", + "dep:hex", +] pyarrow = ["pyo3", "arrow/pyarrow", "parquet"] force_hash_collisions = [] recursive_protection = ["dep:recursive"] +parquet = ["dep:parquet"] [dependencies] ahash = { workspace = true } @@ -58,7 +64,7 @@ base64 = "0.22.1" chrono = { workspace = true } half = { workspace = true } hashbrown = { workspace = true } -hex = "0.4.3" +hex = { workspace = true, optional = true } indexmap = { workspace = true } libc = "0.2.174" log = { workspace = true } diff --git a/datafusion/common/src/cast.rs b/datafusion/common/src/cast.rs index 28202c6684b50..68b753a6678a4 100644 --- a/datafusion/common/src/cast.rs +++ b/datafusion/common/src/cast.rs @@ -22,8 +22,9 @@ use crate::{downcast_value, Result}; use arrow::array::{ - BinaryViewArray, Float16Array, Int16Array, Int8Array, LargeBinaryArray, - LargeStringArray, StringViewArray, UInt16Array, + BinaryViewArray, DurationMicrosecondArray, DurationMillisecondArray, + DurationNanosecondArray, DurationSecondArray, Float16Array, Int16Array, Int8Array, + LargeBinaryArray, LargeStringArray, StringViewArray, UInt16Array, }; use arrow::{ array::{ @@ -41,246 +42,272 @@ use arrow::{ datatypes::{ArrowDictionaryKeyType, ArrowPrimitiveType}, }; -// Downcast ArrayRef to Date32Array +// Downcast Array to Date32Array pub fn as_date32_array(array: &dyn Array) -> Result<&Date32Array> { Ok(downcast_value!(array, Date32Array)) } -// Downcast ArrayRef to Date64Array +// Downcast Array to Date64Array pub fn as_date64_array(array: &dyn Array) -> Result<&Date64Array> { Ok(downcast_value!(array, Date64Array)) } -// Downcast ArrayRef to StructArray +// Downcast Array to StructArray pub fn as_struct_array(array: &dyn Array) -> Result<&StructArray> { Ok(downcast_value!(array, StructArray)) } -// Downcast ArrayRef to Int8Array +// Downcast Array to Int8Array pub fn as_int8_array(array: &dyn Array) -> Result<&Int8Array> { Ok(downcast_value!(array, Int8Array)) } -// Downcast ArrayRef to UInt8Array +// Downcast Array to UInt8Array pub fn as_uint8_array(array: &dyn Array) -> Result<&UInt8Array> { Ok(downcast_value!(array, UInt8Array)) } -// Downcast ArrayRef to Int16Array +// Downcast Array to Int16Array pub fn as_int16_array(array: &dyn Array) -> Result<&Int16Array> { Ok(downcast_value!(array, Int16Array)) } -// Downcast ArrayRef to UInt16Array +// Downcast Array to UInt16Array pub fn as_uint16_array(array: &dyn Array) -> Result<&UInt16Array> { Ok(downcast_value!(array, UInt16Array)) } -// Downcast ArrayRef to Int32Array +// Downcast Array to Int32Array pub fn as_int32_array(array: &dyn Array) -> Result<&Int32Array> { Ok(downcast_value!(array, Int32Array)) } -// Downcast ArrayRef to UInt32Array +// Downcast Array to UInt32Array pub fn as_uint32_array(array: &dyn Array) -> Result<&UInt32Array> { Ok(downcast_value!(array, UInt32Array)) } -// Downcast ArrayRef to Int64Array +// Downcast Array to Int64Array pub fn as_int64_array(array: &dyn Array) -> Result<&Int64Array> { Ok(downcast_value!(array, Int64Array)) } -// Downcast ArrayRef to UInt64Array +// Downcast Array to UInt64Array pub fn as_uint64_array(array: &dyn Array) -> Result<&UInt64Array> { Ok(downcast_value!(array, UInt64Array)) } -// Downcast ArrayRef to Decimal128Array +// Downcast Array to Decimal128Array pub fn as_decimal128_array(array: &dyn Array) -> Result<&Decimal128Array> { Ok(downcast_value!(array, Decimal128Array)) } -// Downcast ArrayRef to Decimal256Array +// Downcast Array to Decimal256Array pub fn as_decimal256_array(array: &dyn Array) -> Result<&Decimal256Array> { Ok(downcast_value!(array, Decimal256Array)) } -// Downcast ArrayRef to Float16Array +// Downcast Array to Float16Array pub fn as_float16_array(array: &dyn Array) -> Result<&Float16Array> { Ok(downcast_value!(array, Float16Array)) } -// Downcast ArrayRef to Float32Array +// Downcast Array to Float32Array pub fn as_float32_array(array: &dyn Array) -> Result<&Float32Array> { Ok(downcast_value!(array, Float32Array)) } -// Downcast ArrayRef to Float64Array +// Downcast Array to Float64Array pub fn as_float64_array(array: &dyn Array) -> Result<&Float64Array> { Ok(downcast_value!(array, Float64Array)) } -// Downcast ArrayRef to StringArray +// Downcast Array to StringArray pub fn as_string_array(array: &dyn Array) -> Result<&StringArray> { Ok(downcast_value!(array, StringArray)) } -// Downcast ArrayRef to StringViewArray +// Downcast Array to StringViewArray pub fn as_string_view_array(array: &dyn Array) -> Result<&StringViewArray> { Ok(downcast_value!(array, StringViewArray)) } -// Downcast ArrayRef to LargeStringArray +// Downcast Array to LargeStringArray pub fn as_large_string_array(array: &dyn Array) -> Result<&LargeStringArray> { Ok(downcast_value!(array, LargeStringArray)) } -// Downcast ArrayRef to BooleanArray +// Downcast Array to BooleanArray pub fn as_boolean_array(array: &dyn Array) -> Result<&BooleanArray> { Ok(downcast_value!(array, BooleanArray)) } -// Downcast ArrayRef to ListArray +// Downcast Array to ListArray pub fn as_list_array(array: &dyn Array) -> Result<&ListArray> { Ok(downcast_value!(array, ListArray)) } -// Downcast ArrayRef to DictionaryArray +// Downcast Array to DictionaryArray pub fn as_dictionary_array( array: &dyn Array, ) -> Result<&DictionaryArray> { Ok(downcast_value!(array, DictionaryArray, T)) } -// Downcast ArrayRef to GenericBinaryArray +// Downcast Array to GenericBinaryArray pub fn as_generic_binary_array( array: &dyn Array, ) -> Result<&GenericBinaryArray> { Ok(downcast_value!(array, GenericBinaryArray, T)) } -// Downcast ArrayRef to GenericListArray +// Downcast Array to GenericListArray pub fn as_generic_list_array( array: &dyn Array, ) -> Result<&GenericListArray> { Ok(downcast_value!(array, GenericListArray, T)) } -// Downcast ArrayRef to LargeListArray +// Downcast Array to LargeListArray pub fn as_large_list_array(array: &dyn Array) -> Result<&LargeListArray> { Ok(downcast_value!(array, LargeListArray)) } -// Downcast ArrayRef to PrimitiveArray +// Downcast Array to PrimitiveArray pub fn as_primitive_array( array: &dyn Array, ) -> Result<&PrimitiveArray> { Ok(downcast_value!(array, PrimitiveArray, T)) } -// Downcast ArrayRef to MapArray +// Downcast Array to MapArray pub fn as_map_array(array: &dyn Array) -> Result<&MapArray> { Ok(downcast_value!(array, MapArray)) } -// Downcast ArrayRef to NullArray +// Downcast Array to NullArray pub fn as_null_array(array: &dyn Array) -> Result<&NullArray> { Ok(downcast_value!(array, NullArray)) } -// Downcast ArrayRef to NullArray +// Downcast Array to NullArray pub fn as_union_array(array: &dyn Array) -> Result<&UnionArray> { Ok(downcast_value!(array, UnionArray)) } -// Downcast ArrayRef to Time32SecondArray +// Downcast Array to Time32SecondArray pub fn as_time32_second_array(array: &dyn Array) -> Result<&Time32SecondArray> { Ok(downcast_value!(array, Time32SecondArray)) } -// Downcast ArrayRef to Time32MillisecondArray +// Downcast Array to Time32MillisecondArray pub fn as_time32_millisecond_array(array: &dyn Array) -> Result<&Time32MillisecondArray> { Ok(downcast_value!(array, Time32MillisecondArray)) } -// Downcast ArrayRef to Time64MicrosecondArray +// Downcast Array to Time64MicrosecondArray pub fn as_time64_microsecond_array(array: &dyn Array) -> Result<&Time64MicrosecondArray> { Ok(downcast_value!(array, Time64MicrosecondArray)) } -// Downcast ArrayRef to Time64NanosecondArray +// Downcast Array to Time64NanosecondArray pub fn as_time64_nanosecond_array(array: &dyn Array) -> Result<&Time64NanosecondArray> { Ok(downcast_value!(array, Time64NanosecondArray)) } -// Downcast ArrayRef to TimestampNanosecondArray +// Downcast Array to TimestampNanosecondArray pub fn as_timestamp_nanosecond_array( array: &dyn Array, ) -> Result<&TimestampNanosecondArray> { Ok(downcast_value!(array, TimestampNanosecondArray)) } -// Downcast ArrayRef to TimestampMillisecondArray +// Downcast Array to TimestampMillisecondArray pub fn as_timestamp_millisecond_array( array: &dyn Array, ) -> Result<&TimestampMillisecondArray> { Ok(downcast_value!(array, TimestampMillisecondArray)) } -// Downcast ArrayRef to TimestampMicrosecondArray +// Downcast Array to TimestampMicrosecondArray pub fn as_timestamp_microsecond_array( array: &dyn Array, ) -> Result<&TimestampMicrosecondArray> { Ok(downcast_value!(array, TimestampMicrosecondArray)) } -// Downcast ArrayRef to TimestampSecondArray +// Downcast Array to TimestampSecondArray pub fn as_timestamp_second_array(array: &dyn Array) -> Result<&TimestampSecondArray> { Ok(downcast_value!(array, TimestampSecondArray)) } -// Downcast ArrayRef to IntervalYearMonthArray +// Downcast Array to IntervalYearMonthArray pub fn as_interval_ym_array(array: &dyn Array) -> Result<&IntervalYearMonthArray> { Ok(downcast_value!(array, IntervalYearMonthArray)) } -// Downcast ArrayRef to IntervalDayTimeArray +// Downcast Array to IntervalDayTimeArray pub fn as_interval_dt_array(array: &dyn Array) -> Result<&IntervalDayTimeArray> { Ok(downcast_value!(array, IntervalDayTimeArray)) } -// Downcast ArrayRef to IntervalMonthDayNanoArray +// Downcast Array to IntervalMonthDayNanoArray pub fn as_interval_mdn_array(array: &dyn Array) -> Result<&IntervalMonthDayNanoArray> { Ok(downcast_value!(array, IntervalMonthDayNanoArray)) } -// Downcast ArrayRef to BinaryArray +// Downcast Array to DurationSecondArray +pub fn as_duration_second_array(array: &dyn Array) -> Result<&DurationSecondArray> { + Ok(downcast_value!(array, DurationSecondArray)) +} + +// Downcast Array to DurationMillisecondArray +pub fn as_duration_millisecond_array( + array: &dyn Array, +) -> Result<&DurationMillisecondArray> { + Ok(downcast_value!(array, DurationMillisecondArray)) +} + +// Downcast Array to DurationMicrosecondArray +pub fn as_duration_microsecond_array( + array: &dyn Array, +) -> Result<&DurationMicrosecondArray> { + Ok(downcast_value!(array, DurationMicrosecondArray)) +} + +// Downcast Array to DurationNanosecondArray +pub fn as_duration_nanosecond_array( + array: &dyn Array, +) -> Result<&DurationNanosecondArray> { + Ok(downcast_value!(array, DurationNanosecondArray)) +} + +// Downcast Array to BinaryArray pub fn as_binary_array(array: &dyn Array) -> Result<&BinaryArray> { Ok(downcast_value!(array, BinaryArray)) } -// Downcast ArrayRef to BinaryViewArray +// Downcast Array to BinaryViewArray pub fn as_binary_view_array(array: &dyn Array) -> Result<&BinaryViewArray> { Ok(downcast_value!(array, BinaryViewArray)) } -// Downcast ArrayRef to LargeBinaryArray +// Downcast Array to LargeBinaryArray pub fn as_large_binary_array(array: &dyn Array) -> Result<&LargeBinaryArray> { Ok(downcast_value!(array, LargeBinaryArray)) } -// Downcast ArrayRef to FixedSizeListArray +// Downcast Array to FixedSizeListArray pub fn as_fixed_size_list_array(array: &dyn Array) -> Result<&FixedSizeListArray> { Ok(downcast_value!(array, FixedSizeListArray)) } -// Downcast ArrayRef to FixedSizeListArray +// Downcast Array to FixedSizeListArray pub fn as_fixed_size_binary_array(array: &dyn Array) -> Result<&FixedSizeBinaryArray> { Ok(downcast_value!(array, FixedSizeBinaryArray)) } -// Downcast ArrayRef to GenericBinaryArray +// Downcast Array to GenericBinaryArray pub fn as_generic_string_array( array: &dyn Array, ) -> Result<&GenericStringArray> { diff --git a/datafusion/common/src/column.rs b/datafusion/common/src/column.rs index b3acaeee5a54c..78b45a1306167 100644 --- a/datafusion/common/src/column.rs +++ b/datafusion/common/src/column.rs @@ -262,7 +262,7 @@ impl Column { // If not due to USING columns then due to ambiguous column name return _schema_err!(SchemaError::AmbiguousReference { - field: Column::new_unqualified(&self.name), + field: Box::new(Column::new_unqualified(&self.name)), }) .map_err(|err| { let mut diagnostic = Diagnostic::new_error( diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index 6618c6aeec28f..06e9d8925fc6d 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -19,6 +19,8 @@ use arrow_ipc::CompressionType; +#[cfg(feature = "parquet_encryption")] +use crate::encryption::{FileDecryptionProperties, FileEncryptionProperties}; use crate::error::_config_err; use crate::parsers::CompressionTypeVariant; use crate::utils::get_available_parallelism; @@ -29,12 +31,8 @@ use std::error::Error; use std::fmt::{self, Display}; use std::str::FromStr; -#[cfg(feature = "parquet")] +#[cfg(feature = "parquet_encryption")] use hex; -#[cfg(feature = "parquet")] -use parquet::encryption::decrypt::FileDecryptionProperties; -#[cfg(feature = "parquet")] -use parquet::encryption::encrypt::FileEncryptionProperties; /// A macro that wraps a configuration struct and automatically derives /// [`Default`] and [`ConfigField`] for it, allowing it to be used @@ -842,6 +840,10 @@ config_namespace! { /// Display format of explain. Default is "indent". /// When set to "tree", it will print the plan in a tree-rendered format. pub format: String, default = "indent".to_string() + + /// (format=tree only) Maximum total width of the rendered tree. + /// When set to 0, the tree will have no width limit. + pub tree_maximum_render_width: usize, default = 240 } } @@ -912,7 +914,7 @@ impl<'a> TryInto> for &'a FormatOptions } /// A key value pair, with a corresponding description -#[derive(Debug)] +#[derive(Debug, Hash, PartialEq, Eq)] pub struct ConfigEntry { /// A unique string to identify this config value pub key: String, @@ -2148,7 +2150,7 @@ impl ConfigField for ConfigFileEncryptionProperties { } } -#[cfg(feature = "parquet")] +#[cfg(feature = "parquet_encryption")] impl From for FileEncryptionProperties { fn from(val: ConfigFileEncryptionProperties) -> Self { let mut fep = FileEncryptionProperties::builder( @@ -2194,7 +2196,7 @@ impl From for FileEncryptionProperties { } } -#[cfg(feature = "parquet")] +#[cfg(feature = "parquet_encryption")] impl From<&FileEncryptionProperties> for ConfigFileEncryptionProperties { fn from(f: &FileEncryptionProperties) -> Self { let (column_names_vec, column_keys_vec, column_metas_vec) = f.column_keys(); @@ -2308,7 +2310,7 @@ impl ConfigField for ConfigFileDecryptionProperties { } } -#[cfg(feature = "parquet")] +#[cfg(feature = "parquet_encryption")] impl From for FileDecryptionProperties { fn from(val: ConfigFileDecryptionProperties) -> Self { let mut column_names: Vec<&str> = Vec::new(); @@ -2342,7 +2344,7 @@ impl From for FileDecryptionProperties { } } -#[cfg(feature = "parquet")] +#[cfg(feature = "parquet_encryption")] impl From<&FileDecryptionProperties> for ConfigFileDecryptionProperties { fn from(f: &FileDecryptionProperties) -> Self { let (column_names_vec, column_keys_vec) = f.column_keys(); @@ -2688,7 +2690,7 @@ mod tests { ); } - #[cfg(feature = "parquet")] + #[cfg(feature = "parquet_encryption")] #[test] fn parquet_table_encryption() { use crate::config::{ diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs index 804e14bf72fb0..88303bbcd7d24 100644 --- a/datafusion/common/src/dfschema.rs +++ b/datafusion/common/src/dfschema.rs @@ -206,6 +206,25 @@ impl DFSchema { Ok(dfschema) } + /// Return the same schema, where all fields have a given qualifier. + pub fn with_field_specific_qualified_schema( + &self, + qualifiers: Vec>, + ) -> Result { + if qualifiers.len() != self.fields().len() { + return _plan_err!( + "Number of qualifiers must match number of fields. Expected {}, got {}", + self.fields().len(), + qualifiers.len() + ); + } + Ok(DFSchema { + inner: Arc::clone(&self.inner), + field_qualifiers: qualifiers, + functional_dependencies: self.functional_dependencies.clone(), + }) + } + /// Check if the schema have some fields with the same name pub fn check_names(&self) -> Result<()> { let mut qualified_names = BTreeSet::new(); @@ -229,7 +248,7 @@ impl DFSchema { for (qualifier, name) in qualified_names { if unqualified_names.contains(name) { return _schema_err!(SchemaError::AmbiguousReference { - field: Column::new(Some(qualifier.clone()), name) + field: Box::new(Column::new(Some(qualifier.clone()), name)) }); } } @@ -489,7 +508,7 @@ impl DFSchema { Ok((fields_without_qualifier[0].0, fields_without_qualifier[0].1)) } else { _schema_err!(SchemaError::AmbiguousReference { - field: Column::new_unqualified(name.to_string(),), + field: Box::new(Column::new_unqualified(name.to_string())) }) } } diff --git a/datafusion/common/src/encryption.rs b/datafusion/common/src/encryption.rs new file mode 100644 index 0000000000000..5d50d4a9efd37 --- /dev/null +++ b/datafusion/common/src/encryption.rs @@ -0,0 +1,76 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Support optional features for encryption in Parquet files. +//! This module provides types and functions related to encryption in Parquet files. + +#[cfg(feature = "parquet_encryption")] +pub use parquet::encryption::decrypt::FileDecryptionProperties; +#[cfg(feature = "parquet_encryption")] +pub use parquet::encryption::encrypt::FileEncryptionProperties; + +#[cfg(not(feature = "parquet_encryption"))] +pub struct FileDecryptionProperties; +#[cfg(not(feature = "parquet_encryption"))] +pub struct FileEncryptionProperties; + +#[cfg(feature = "parquet")] +use crate::config::ParquetEncryptionOptions; +pub use crate::config::{ConfigFileDecryptionProperties, ConfigFileEncryptionProperties}; +#[cfg(feature = "parquet")] +use parquet::file::properties::WriterPropertiesBuilder; + +#[cfg(feature = "parquet")] +pub fn add_crypto_to_writer_properties( + #[allow(unused)] crypto: &ParquetEncryptionOptions, + #[allow(unused_mut)] mut builder: WriterPropertiesBuilder, +) -> WriterPropertiesBuilder { + #[cfg(feature = "parquet_encryption")] + if let Some(file_encryption_properties) = &crypto.file_encryption { + builder = builder + .with_file_encryption_properties(file_encryption_properties.clone().into()); + } + builder +} + +#[cfg(feature = "parquet_encryption")] +pub fn map_encryption_to_config_encryption( + encryption: Option<&FileEncryptionProperties>, +) -> Option { + encryption.map(|fe| fe.into()) +} + +#[cfg(not(feature = "parquet_encryption"))] +pub fn map_encryption_to_config_encryption( + _encryption: Option<&FileEncryptionProperties>, +) -> Option { + None +} + +#[cfg(feature = "parquet_encryption")] +pub fn map_config_decryption_to_decryption( + decryption: Option<&ConfigFileDecryptionProperties>, +) -> Option { + decryption.map(|fd| fd.clone().into()) +} + +#[cfg(not(feature = "parquet_encryption"))] +pub fn map_config_decryption_to_decryption( + _decryption: Option<&ConfigFileDecryptionProperties>, +) -> Option { + None +} diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs index b4a537fdce7ee..88029ea474fde 100644 --- a/datafusion/common/src/error.rs +++ b/datafusion/common/src/error.rs @@ -53,22 +53,22 @@ pub enum DataFusionError { /// Error returned by arrow. /// /// 2nd argument is for optional backtrace - ArrowError(ArrowError, Option), + ArrowError(Box, Option), /// Error when reading / writing Parquet data. #[cfg(feature = "parquet")] - ParquetError(ParquetError), + ParquetError(Box), /// Error when reading Avro data. #[cfg(feature = "avro")] AvroError(Box), /// Error when reading / writing to / from an object_store (e.g. S3 or LocalFile) #[cfg(feature = "object_store")] - ObjectStore(object_store::Error), + ObjectStore(Box), /// Error when an I/O operation fails IoError(io::Error), /// Error when SQL is syntactically incorrect. /// /// 2nd argument is for optional backtrace - SQL(ParserError, Option), + SQL(Box, Option), /// Error when a feature is not yet implemented. /// /// These errors are sometimes returned for features that are still in @@ -107,7 +107,7 @@ pub enum DataFusionError { /// /// 2nd argument is for optional backtrace /// Boxing the optional backtrace to prevent - SchemaError(SchemaError, Box>), + SchemaError(Box, Box>), /// Error during execution of the query. /// /// This error is returned when an error happens during execution due to a @@ -118,7 +118,7 @@ pub enum DataFusionError { /// [`JoinError`] during execution of the query. /// /// This error can't occur for unjoined tasks, such as execution shutdown. - ExecutionJoin(JoinError), + ExecutionJoin(Box), /// Error when resources (such as memory of scratch disk space) are exhausted. /// /// This error is thrown when a consumer cannot acquire additional memory @@ -164,7 +164,7 @@ macro_rules! context { #[derive(Debug)] pub enum SchemaError { /// Schema contains a (possibly) qualified and unqualified field with same unqualified name - AmbiguousReference { field: Column }, + AmbiguousReference { field: Box }, /// Schema contains duplicate qualified field name DuplicateQualifiedField { qualifier: Box, @@ -276,14 +276,14 @@ impl From for DataFusionError { impl From for DataFusionError { fn from(e: ArrowError) -> Self { - DataFusionError::ArrowError(e, None) + DataFusionError::ArrowError(Box::new(e), None) } } impl From for ArrowError { fn from(e: DataFusionError) -> Self { match e { - DataFusionError::ArrowError(e, _) => e, + DataFusionError::ArrowError(e, _) => *e, DataFusionError::External(e) => ArrowError::ExternalError(e), other => ArrowError::ExternalError(Box::new(other)), } @@ -304,7 +304,7 @@ impl From<&Arc> for DataFusionError { #[cfg(feature = "parquet")] impl From for DataFusionError { fn from(e: ParquetError) -> Self { - DataFusionError::ParquetError(e) + DataFusionError::ParquetError(Box::new(e)) } } @@ -318,20 +318,20 @@ impl From for DataFusionError { #[cfg(feature = "object_store")] impl From for DataFusionError { fn from(e: object_store::Error) -> Self { - DataFusionError::ObjectStore(e) + DataFusionError::ObjectStore(Box::new(e)) } } #[cfg(feature = "object_store")] impl From for DataFusionError { fn from(e: object_store::path::Error) -> Self { - DataFusionError::ObjectStore(e.into()) + DataFusionError::ObjectStore(Box::new(e.into())) } } impl From for DataFusionError { fn from(e: ParserError) -> Self { - DataFusionError::SQL(e, None) + DataFusionError::SQL(Box::new(e), None) } } @@ -361,22 +361,22 @@ impl Display for DataFusionError { impl Error for DataFusionError { fn source(&self) -> Option<&(dyn Error + 'static)> { match self { - DataFusionError::ArrowError(e, _) => Some(e), + DataFusionError::ArrowError(e, _) => Some(e.as_ref()), #[cfg(feature = "parquet")] - DataFusionError::ParquetError(e) => Some(e), + DataFusionError::ParquetError(e) => Some(e.as_ref()), #[cfg(feature = "avro")] - DataFusionError::AvroError(e) => Some(e), + DataFusionError::AvroError(e) => Some(e.as_ref()), #[cfg(feature = "object_store")] - DataFusionError::ObjectStore(e) => Some(e), + DataFusionError::ObjectStore(e) => Some(e.as_ref()), DataFusionError::IoError(e) => Some(e), - DataFusionError::SQL(e, _) => Some(e), + DataFusionError::SQL(e, _) => Some(e.as_ref()), DataFusionError::NotImplemented(_) => None, DataFusionError::Internal(_) => None, DataFusionError::Configuration(_) => None, DataFusionError::Plan(_) => None, - DataFusionError::SchemaError(e, _) => Some(e), + DataFusionError::SchemaError(e, _) => Some(e.as_ref()), DataFusionError::Execution(_) => None, - DataFusionError::ExecutionJoin(e) => Some(e), + DataFusionError::ExecutionJoin(e) => Some(e.as_ref()), DataFusionError::ResourcesExhausted(_) => None, DataFusionError::External(e) => Some(e.as_ref()), DataFusionError::Context(_, e) => Some(e.as_ref()), @@ -542,8 +542,9 @@ impl DataFusionError { DataFusionError::Configuration(ref desc) => Cow::Owned(desc.to_string()), DataFusionError::NotImplemented(ref desc) => Cow::Owned(desc.to_string()), DataFusionError::Internal(ref desc) => Cow::Owned(format!( - "{desc}.\nThis was likely caused by a bug in DataFusion's \ - code and we would welcome that you file an bug report in our issue tracker" + "{desc}.\nThis issue was likely caused by a bug in DataFusion's code. \ + Please help us to resolve this by filing a bug report in our issue tracker: \ + https://github.com/apache/datafusion/issues" )), DataFusionError::Plan(ref desc) => Cow::Owned(desc.to_string()), DataFusionError::SchemaError(ref desc, ref backtrace) => { @@ -828,7 +829,7 @@ make_error!(resources_err, resources_datafusion_err, ResourcesExhausted); #[macro_export] macro_rules! sql_datafusion_err { ($ERR:expr $(; diagnostic = $DIAG:expr)?) => {{ - let err = DataFusionError::SQL($ERR, Some(DataFusionError::get_back_trace())); + let err = DataFusionError::SQL(Box::new($ERR), Some(DataFusionError::get_back_trace())); $( let err = err.with_diagnostic($DIAG); )? @@ -852,7 +853,7 @@ macro_rules! sql_err { #[macro_export] macro_rules! arrow_datafusion_err { ($ERR:expr $(; diagnostic = $DIAG:expr)?) => {{ - let err = DataFusionError::ArrowError($ERR, Some(DataFusionError::get_back_trace())); + let err = DataFusionError::ArrowError(Box::new($ERR), Some(DataFusionError::get_back_trace())); $( let err = err.with_diagnostic($DIAG); )? @@ -878,7 +879,7 @@ macro_rules! arrow_err { macro_rules! schema_datafusion_err { ($ERR:expr $(; diagnostic = $DIAG:expr)?) => {{ let err = $crate::error::DataFusionError::SchemaError( - $ERR, + Box::new($ERR), Box::new(Some($crate::error::DataFusionError::get_back_trace())), ); $( @@ -893,7 +894,7 @@ macro_rules! schema_datafusion_err { macro_rules! schema_err { ($ERR:expr $(; diagnostic = $DIAG:expr)?) => {{ let err = $crate::error::DataFusionError::SchemaError( - $ERR, + Box::new($ERR), Box::new(Some($crate::error::DataFusionError::get_back_trace())), ); $( @@ -951,11 +952,21 @@ pub fn add_possible_columns_to_diag( #[cfg(test)] mod test { + use super::*; + + use std::mem::size_of; use std::sync::Arc; - use crate::error::{DataFusionError, GenericError}; use arrow::error::ArrowError; + #[test] + fn test_error_size() { + // Since Errors influence the size of Result which influence the size of the stack + // please don't allow this to grow larger + assert_eq!(size_of::(), 40); + assert_eq!(size_of::(), 40); + } + #[test] fn datafusion_error_to_arrow() { let res = return_arrow_error().unwrap_err(); @@ -1020,8 +1031,8 @@ mod test { do_root_test( DataFusionError::ArrowError( - ArrowError::ExternalError(Box::new(DataFusionError::ResourcesExhausted( - "foo".to_string(), + Box::new(ArrowError::ExternalError(Box::new( + DataFusionError::ResourcesExhausted("foo".to_string()), ))), None, ), @@ -1044,9 +1055,11 @@ mod test { do_root_test( DataFusionError::ArrowError( - ArrowError::ExternalError(Box::new(ArrowError::ExternalError(Box::new( - DataFusionError::ResourcesExhausted("foo".to_string()), - )))), + Box::new(ArrowError::ExternalError(Box::new( + ArrowError::ExternalError(Box::new( + DataFusionError::ResourcesExhausted("foo".to_string()), + )), + ))), None, ), DataFusionError::ResourcesExhausted("foo".to_string()), @@ -1120,7 +1133,7 @@ mod test { ); // assert wrapping other Error - let generic_error: GenericError = Box::new(std::io::Error::other("io error")); + let generic_error: GenericError = Box::new(io::Error::other("io error")); let datafusion_error: DataFusionError = generic_error.into(); println!("{}", datafusion_error.strip_backtrace()); assert_eq!( @@ -1131,7 +1144,7 @@ mod test { #[test] fn external_error_no_recursive() { - let generic_error_1: GenericError = Box::new(std::io::Error::other("io error")); + let generic_error_1: GenericError = Box::new(io::Error::other("io error")); let external_error_1: DataFusionError = generic_error_1.into(); let generic_error_2: GenericError = Box::new(external_error_1); let external_error_2: DataFusionError = generic_error_2.into(); @@ -1151,7 +1164,7 @@ mod test { /// Model what happens when using arrow kernels in DataFusion /// code: need to turn an ArrowError into a DataFusionError - fn return_datafusion_error() -> crate::error::Result<()> { + fn return_datafusion_error() -> Result<()> { // Expect the '?' to work Err(ArrowError::SchemaError("bar".to_string()).into()) } diff --git a/datafusion/common/src/file_options/parquet_writer.rs b/datafusion/common/src/file_options/parquet_writer.rs index 60f0f4abb0c05..cde0ea1299795 100644 --- a/datafusion/common/src/file_options/parquet_writer.rs +++ b/datafusion/common/src/file_options/parquet_writer.rs @@ -27,6 +27,7 @@ use crate::{ use arrow::datatypes::Schema; // TODO: handle once deprecated +use crate::encryption::add_crypto_to_writer_properties; #[allow(deprecated)] use parquet::{ arrow::ARROW_SCHEMA_META_KEY, @@ -100,11 +101,7 @@ impl TryFrom<&TableParquetOptions> for WriterPropertiesBuilder { let mut builder = global.into_writer_properties_builder()?; - if let Some(file_encryption_properties) = &crypto.file_encryption { - builder = builder.with_file_encryption_properties( - file_encryption_properties.clone().into(), - ); - } + builder = add_crypto_to_writer_properties(crypto, builder); // check that the arrow schema is present in the kv_metadata, if configured to do so if !global.skip_arrow_metadata @@ -456,12 +453,10 @@ mod tests { }; use std::collections::HashMap; - use crate::config::{ - ConfigFileEncryptionProperties, ParquetColumnOptions, ParquetEncryptionOptions, - ParquetOptions, - }; - use super::*; + use crate::config::{ParquetColumnOptions, ParquetEncryptionOptions, ParquetOptions}; + #[cfg(feature = "parquet_encryption")] + use crate::encryption::map_encryption_to_config_encryption; const COL_NAME: &str = "configured"; @@ -590,8 +585,10 @@ mod tests { HashMap::from([(COL_NAME.into(), configured_col_props)]) }; - let fep: Option = - props.file_encryption_properties().map(|fe| fe.into()); + #[cfg(feature = "parquet_encryption")] + let fep = map_encryption_to_config_encryption(props.file_encryption_properties()); + #[cfg(not(feature = "parquet_encryption"))] + let fep = None; #[allow(deprecated)] // max_statistics_size TableParquetOptions { diff --git a/datafusion/common/src/hash_utils.rs b/datafusion/common/src/hash_utils.rs index e78d42257b9cb..4b18351f708b7 100644 --- a/datafusion/common/src/hash_utils.rs +++ b/datafusion/common/src/hash_utils.rs @@ -184,6 +184,26 @@ fn hash_array( } } +/// Helper function to update hash for a dictionary key if the value is valid +#[cfg(not(feature = "force_hash_collisions"))] +#[inline] +fn update_hash_for_dict_key( + hash: &mut u64, + dict_hashes: &[u64], + dict_values: &dyn Array, + idx: usize, + multi_col: bool, +) { + if dict_values.is_valid(idx) { + if multi_col { + *hash = combine_hashes(dict_hashes[idx], *hash); + } else { + *hash = dict_hashes[idx]; + } + } + // no update for invalid dictionary value +} + /// Hash the values in a dictionary array #[cfg(not(feature = "force_hash_collisions"))] fn hash_dictionary( @@ -195,23 +215,23 @@ fn hash_dictionary( // Hash each dictionary value once, and then use that computed // hash for each key value to avoid a potentially expensive // redundant hashing for large dictionary elements (e.g. strings) - let values = Arc::clone(array.values()); - let mut dict_hashes = vec![0; values.len()]; - create_hashes(&[values], random_state, &mut dict_hashes)?; + let dict_values = Arc::clone(array.values()); + let mut dict_hashes = vec![0; dict_values.len()]; + create_hashes(&[dict_values], random_state, &mut dict_hashes)?; // combine hash for each index in values - if multi_col { - for (hash, key) in hashes_buffer.iter_mut().zip(array.keys().iter()) { - if let Some(key) = key { - *hash = combine_hashes(dict_hashes[key.as_usize()], *hash) - } // no update for Null, consistent with other hashes - } - } else { - for (hash, key) in hashes_buffer.iter_mut().zip(array.keys().iter()) { - if let Some(key) = key { - *hash = dict_hashes[key.as_usize()] - } // no update for Null, consistent with other hashes - } + let dict_values = array.values(); + for (hash, key) in hashes_buffer.iter_mut().zip(array.keys().iter()) { + if let Some(key) = key { + let idx = key.as_usize(); + update_hash_for_dict_key( + hash, + &dict_hashes, + dict_values.as_ref(), + idx, + multi_col, + ); + } // no update for Null key } Ok(()) } diff --git a/datafusion/common/src/lib.rs b/datafusion/common/src/lib.rs index 3ea7321ef3b4b..3a558fa867894 100644 --- a/datafusion/common/src/lib.rs +++ b/datafusion/common/src/lib.rs @@ -41,6 +41,7 @@ pub mod config; pub mod cse; pub mod diagnostic; pub mod display; +pub mod encryption; pub mod error; pub mod file_options; pub mod format; @@ -139,10 +140,12 @@ pub mod __private { impl DowncastArrayHelper for T { fn downcast_array_helper(&self) -> Result<&U> { self.as_any().downcast_ref().ok_or_else(|| { + let actual_type = self.data_type(); + let desired_type_name = type_name::(); _internal_datafusion_err!( "could not cast array of type {} to {}", - self.data_type(), - type_name::() + actual_type, + desired_type_name ) }) } diff --git a/datafusion/common/src/scalar/cache.rs b/datafusion/common/src/scalar/cache.rs new file mode 100644 index 0000000000000..f1476a518774b --- /dev/null +++ b/datafusion/common/src/scalar/cache.rs @@ -0,0 +1,215 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Array caching utilities for scalar values + +use std::iter::repeat_n; +use std::sync::{Arc, LazyLock, Mutex}; + +use arrow::array::{new_null_array, Array, ArrayRef, PrimitiveArray}; +use arrow::datatypes::{ + ArrowDictionaryKeyType, DataType, Int16Type, Int32Type, Int64Type, Int8Type, + UInt16Type, UInt32Type, UInt64Type, UInt8Type, +}; + +/// Maximum number of rows to cache to be conservative on memory usage +const MAX_CACHE_SIZE: usize = 1024 * 1024; + +/// Cache for dictionary key arrays to avoid repeated allocations +/// when the same size is used frequently. +/// +/// Similar to PartitionColumnProjector's ZeroBufferGenerators, this cache +/// stores key arrays for different dictionary key types. The cache is +/// limited to 1 entry per type (the last size used) to prevent memory leaks +/// for extremely large array requests. +#[derive(Debug)] +struct KeyArrayCache { + cache: Option<(usize, bool, PrimitiveArray)>, // (num_rows, is_null, key_array) +} + +impl Default for KeyArrayCache { + fn default() -> Self { + Self { cache: None } + } +} + +impl KeyArrayCache { + /// Get or create a cached key array for the given number of rows and null status + fn get_or_create(&mut self, num_rows: usize, is_null: bool) -> PrimitiveArray { + // Check cache size limit to prevent memory leaks + if num_rows > MAX_CACHE_SIZE { + // For very large arrays, don't cache them - just create and return + return self.create_key_array(num_rows, is_null); + } + + match &self.cache { + Some((cached_num_rows, cached_is_null, cached_array)) + if *cached_num_rows == num_rows && *cached_is_null == is_null => + { + // Cache hit: reuse existing array if same size and null status + cached_array.clone() + } + _ => { + // Cache miss: create new array and cache it + let key_array = self.create_key_array(num_rows, is_null); + self.cache = Some((num_rows, is_null, key_array.clone())); + key_array + } + } + } + + /// Create a new key array with the specified number of rows and null status + fn create_key_array(&self, num_rows: usize, is_null: bool) -> PrimitiveArray { + let key_array: PrimitiveArray = repeat_n( + if is_null { + None + } else { + Some(K::default_value()) + }, + num_rows, + ) + .collect(); + key_array + } +} + +/// Cache for null arrays to avoid repeated allocations +/// when the same size is used frequently. +#[derive(Debug, Default)] +struct NullArrayCache { + cache: Option<(usize, ArrayRef)>, // (num_rows, null_array) +} + +impl NullArrayCache { + /// Get or create a cached null array for the given number of rows + fn get_or_create(&mut self, num_rows: usize) -> ArrayRef { + // Check cache size limit to prevent memory leaks + if num_rows > MAX_CACHE_SIZE { + // For very large arrays, don't cache them - just create and return + return new_null_array(&DataType::Null, num_rows); + } + + match &self.cache { + Some((cached_num_rows, cached_array)) if *cached_num_rows == num_rows => { + // Cache hit: reuse existing array if same size + Arc::clone(cached_array) + } + _ => { + // Cache miss: create new array and cache it + let null_array = new_null_array(&DataType::Null, num_rows); + self.cache = Some((num_rows, Arc::clone(&null_array))); + null_array + } + } + } +} + +/// Global cache for dictionary key arrays and null arrays +#[derive(Debug, Default)] +struct ArrayCaches { + cache_i8: KeyArrayCache, + cache_i16: KeyArrayCache, + cache_i32: KeyArrayCache, + cache_i64: KeyArrayCache, + cache_u8: KeyArrayCache, + cache_u16: KeyArrayCache, + cache_u32: KeyArrayCache, + cache_u64: KeyArrayCache, + null_cache: NullArrayCache, +} + +static ARRAY_CACHES: LazyLock> = + LazyLock::new(|| Mutex::new(ArrayCaches::default())); + +/// Get the global cache for arrays +fn get_array_caches() -> &'static Mutex { + &ARRAY_CACHES +} + +/// Get or create a cached null array for the given number of rows +pub(crate) fn get_or_create_cached_null_array(num_rows: usize) -> ArrayRef { + let cache = get_array_caches(); + let mut caches = cache.lock().unwrap(); + caches.null_cache.get_or_create(num_rows) +} + +/// Get or create a cached key array for a specific key type +pub(crate) fn get_or_create_cached_key_array( + num_rows: usize, + is_null: bool, +) -> PrimitiveArray { + let cache = get_array_caches(); + let mut caches = cache.lock().unwrap(); + + // Use the DATA_TYPE to dispatch to the correct cache, similar to original implementation + match K::DATA_TYPE { + DataType::Int8 => { + let array = caches.cache_i8.get_or_create(num_rows, is_null); + // Convert using ArrayData to avoid unsafe transmute + let array_data = array.to_data(); + PrimitiveArray::::from(array_data) + } + DataType::Int16 => { + let array = caches.cache_i16.get_or_create(num_rows, is_null); + let array_data = array.to_data(); + PrimitiveArray::::from(array_data) + } + DataType::Int32 => { + let array = caches.cache_i32.get_or_create(num_rows, is_null); + let array_data = array.to_data(); + PrimitiveArray::::from(array_data) + } + DataType::Int64 => { + let array = caches.cache_i64.get_or_create(num_rows, is_null); + let array_data = array.to_data(); + PrimitiveArray::::from(array_data) + } + DataType::UInt8 => { + let array = caches.cache_u8.get_or_create(num_rows, is_null); + let array_data = array.to_data(); + PrimitiveArray::::from(array_data) + } + DataType::UInt16 => { + let array = caches.cache_u16.get_or_create(num_rows, is_null); + let array_data = array.to_data(); + PrimitiveArray::::from(array_data) + } + DataType::UInt32 => { + let array = caches.cache_u32.get_or_create(num_rows, is_null); + let array_data = array.to_data(); + PrimitiveArray::::from(array_data) + } + DataType::UInt64 => { + let array = caches.cache_u64.get_or_create(num_rows, is_null); + let array_data = array.to_data(); + PrimitiveArray::::from(array_data) + } + _ => { + // Fallback for unsupported types - create array directly without caching + let key_array: PrimitiveArray = repeat_n( + if is_null { + None + } else { + Some(K::default_value()) + }, + num_rows, + ) + .collect(); + key_array + } + } +} diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index 6716cb5f9be89..1ced4ab825dd3 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -17,6 +17,7 @@ //! [`ScalarValue`]: stores single values +mod cache; mod consts; mod struct_builder; @@ -33,8 +34,19 @@ use std::str::FromStr; use std::sync::Arc; use crate::cast::{ - as_decimal128_array, as_decimal256_array, as_dictionary_array, - as_fixed_size_binary_array, as_fixed_size_list_array, + as_binary_array, as_binary_view_array, as_boolean_array, as_date32_array, + as_date64_array, as_decimal128_array, as_decimal256_array, as_dictionary_array, + as_duration_microsecond_array, as_duration_millisecond_array, + as_duration_nanosecond_array, as_duration_second_array, as_fixed_size_binary_array, + as_fixed_size_list_array, as_float16_array, as_float32_array, as_float64_array, + as_int16_array, as_int32_array, as_int64_array, as_int8_array, as_interval_dt_array, + as_interval_mdn_array, as_interval_ym_array, as_large_binary_array, + as_large_list_array, as_large_string_array, as_string_array, as_string_view_array, + as_time32_millisecond_array, as_time32_second_array, as_time64_microsecond_array, + as_time64_nanosecond_array, as_timestamp_microsecond_array, + as_timestamp_millisecond_array, as_timestamp_nanosecond_array, + as_timestamp_second_array, as_uint16_array, as_uint32_array, as_uint64_array, + as_uint8_array, as_union_array, }; use crate::error::{DataFusionError, Result, _exec_err, _internal_err, _not_impl_err}; use crate::format::DEFAULT_CAST_OPTIONS; @@ -42,23 +54,36 @@ use crate::hash_utils::create_hashes; use crate::utils::SingleRowListArrayBuilder; use crate::{_internal_datafusion_err, arrow_datafusion_err}; use arrow::array::{ - types::{IntervalDayTime, IntervalMonthDayNano}, - *, + new_empty_array, new_null_array, Array, ArrayData, ArrayRef, ArrowNativeTypeOp, + ArrowPrimitiveType, AsArray, BinaryArray, BinaryViewArray, BooleanArray, Date32Array, + Date64Array, Decimal128Array, Decimal256Array, DictionaryArray, + DurationMicrosecondArray, DurationMillisecondArray, DurationNanosecondArray, + DurationSecondArray, FixedSizeBinaryArray, FixedSizeListArray, Float16Array, + Float32Array, Float64Array, GenericListArray, Int16Array, Int32Array, Int64Array, + Int8Array, IntervalDayTimeArray, IntervalMonthDayNanoArray, IntervalYearMonthArray, + LargeBinaryArray, LargeListArray, LargeStringArray, ListArray, MapArray, + MutableArrayData, PrimitiveArray, Scalar, StringArray, StringViewArray, StructArray, + Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, + Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray, + TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array, + UInt64Array, UInt8Array, UnionArray, }; use arrow::buffer::ScalarBuffer; -use arrow::compute::kernels::{ - cast::{cast_with_options, CastOptions}, - numeric::*, +use arrow::compute::kernels::cast::{cast_with_options, CastOptions}; +use arrow::compute::kernels::numeric::{ + add, add_wrapping, div, mul, mul_wrapping, rem, sub, sub_wrapping, }; use arrow::datatypes::{ - i256, ArrowDictionaryKeyType, ArrowNativeType, ArrowTimestampType, DataType, - Date32Type, Field, Float32Type, Int16Type, Int32Type, Int64Type, Int8Type, - IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit, IntervalYearMonthType, - TimeUnit, TimestampMicrosecondType, TimestampMillisecondType, + i256, validate_decimal_precision_and_scale, ArrowDictionaryKeyType, ArrowNativeType, + ArrowTimestampType, DataType, Date32Type, Decimal128Type, Decimal256Type, Field, + Float32Type, Int16Type, Int32Type, Int64Type, Int8Type, IntervalDayTime, + IntervalDayTimeType, IntervalMonthDayNano, IntervalMonthDayNanoType, IntervalUnit, + IntervalYearMonthType, TimeUnit, TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type, UnionFields, UnionMode, DECIMAL128_MAX_PRECISION, }; use arrow::util::display::{array_value_to_string, ArrayFormatter, FormatOptions}; +use cache::{get_or_create_cached_key_array, get_or_create_cached_null_array}; use chrono::{Duration, NaiveDate}; use half::f16; pub use struct_builder::ScalarStructBuilder; @@ -192,6 +217,8 @@ pub use struct_builder::ScalarStructBuilder; /// See [datatypes](https://arrow.apache.org/docs/python/api/datatypes.html) for /// details on datatypes and the [format](https://github.com/apache/arrow/blob/master/format/Schema.fbs#L354-L375) /// for the definitive reference. +/// +/// [`NullArray`]: arrow::array::NullArray #[derive(Clone)] pub enum ScalarValue { /// represents `DataType::Null` (castable to/from any other type) @@ -840,15 +867,9 @@ fn dict_from_scalar( let values_array = value.to_array_of_size(1)?; // Create a key array with `size` elements, each of 0 - let key_array: PrimitiveArray = repeat_n( - if value.is_null() { - None - } else { - Some(K::default_value()) - }, - size, - ) - .collect(); + // Use cache to avoid repeated allocations for the same size + let key_array: PrimitiveArray = + get_or_create_cached_key_array::(size, value.is_null()); // create a new DictionaryArray // @@ -895,17 +916,8 @@ fn dict_from_values( } macro_rules! typed_cast_tz { - ($array:expr, $index:expr, $ARRAYTYPE:ident, $SCALAR:ident, $TZ:expr) => {{ - use std::any::type_name; - let array = $array - .as_any() - .downcast_ref::<$ARRAYTYPE>() - .ok_or_else(|| { - DataFusionError::Internal(format!( - "could not cast value to {}", - type_name::<$ARRAYTYPE>() - )) - })?; + ($array:expr, $index:expr, $array_cast:ident, $SCALAR:ident, $TZ:expr) => {{ + let array = $array_cast($array)?; Ok::(ScalarValue::$SCALAR( match array.is_null($index) { true => None, @@ -917,17 +929,8 @@ macro_rules! typed_cast_tz { } macro_rules! typed_cast { - ($array:expr, $index:expr, $ARRAYTYPE:ident, $SCALAR:ident) => {{ - use std::any::type_name; - let array = $array - .as_any() - .downcast_ref::<$ARRAYTYPE>() - .ok_or_else(|| { - DataFusionError::Internal(format!( - "could not cast value to {}", - type_name::<$ARRAYTYPE>() - )) - })?; + ($array:expr, $index:expr, $array_cast:ident, $SCALAR:ident) => {{ + let array = $array_cast($array)?; Ok::(ScalarValue::$SCALAR( match array.is_null($index) { true => None, @@ -964,17 +967,8 @@ macro_rules! build_timestamp_array_from_option { } macro_rules! eq_array_primitive { - ($array:expr, $index:expr, $ARRAYTYPE:ident, $VALUE:expr) => {{ - use std::any::type_name; - let array = $array - .as_any() - .downcast_ref::<$ARRAYTYPE>() - .ok_or_else(|| { - DataFusionError::Internal(format!( - "could not cast value to {}", - type_name::<$ARRAYTYPE>() - )) - })?; + ($array:expr, $index:expr, $array_cast:ident, $VALUE:expr) => {{ + let array = $array_cast($array)?; let is_valid = array.is_valid($index); Ok::(match $VALUE { Some(val) => is_valid && &array.value($index) == val, @@ -1371,6 +1365,144 @@ impl ScalarValue { }) } + /// Returns a default value for the given `DataType`. + /// + /// This function is useful when you need to initialize a column with + /// non-null values in a DataFrame or when you need a "zero" value + /// for a specific data type. + /// + /// # Default Values + /// + /// - **Numeric types**: Returns zero (via [`new_zero`]) + /// - **String types**: Returns empty string (`""`) + /// - **Binary types**: Returns empty byte array + /// - **Temporal types**: Returns zero/epoch value + /// - **List types**: Returns empty list + /// - **Struct types**: Returns struct with all fields set to their defaults + /// - **Dictionary types**: Returns dictionary with default value + /// - **Map types**: Returns empty map + /// - **Union types**: Returns first variant with default value + /// + /// # Errors + /// + /// Returns an error for data types that don't have a clear default value + /// or are not yet supported (e.g., `RunEndEncoded`). + /// + /// [`new_zero`]: Self::new_zero + pub fn new_default(datatype: &DataType) -> Result { + match datatype { + // Null type + DataType::Null => Ok(ScalarValue::Null), + + // Numeric types + DataType::Boolean + | DataType::Int8 + | DataType::Int16 + | DataType::Int32 + | DataType::Int64 + | DataType::UInt8 + | DataType::UInt16 + | DataType::UInt32 + | DataType::UInt64 + | DataType::Float16 + | DataType::Float32 + | DataType::Float64 + | DataType::Decimal128(_, _) + | DataType::Decimal256(_, _) + | DataType::Timestamp(_, _) + | DataType::Time32(_) + | DataType::Time64(_) + | DataType::Interval(_) + | DataType::Duration(_) + | DataType::Date32 + | DataType::Date64 => ScalarValue::new_zero(datatype), + + // String types + DataType::Utf8 => Ok(ScalarValue::Utf8(Some("".to_string()))), + DataType::LargeUtf8 => Ok(ScalarValue::LargeUtf8(Some("".to_string()))), + DataType::Utf8View => Ok(ScalarValue::Utf8View(Some("".to_string()))), + + // Binary types + DataType::Binary => Ok(ScalarValue::Binary(Some(vec![]))), + DataType::LargeBinary => Ok(ScalarValue::LargeBinary(Some(vec![]))), + DataType::BinaryView => Ok(ScalarValue::BinaryView(Some(vec![]))), + + // Fixed-size binary + DataType::FixedSizeBinary(size) => Ok(ScalarValue::FixedSizeBinary( + *size, + Some(vec![0; *size as usize]), + )), + + // List types + DataType::List(field) => { + let list = + ScalarValue::new_list(&[], field.data_type(), field.is_nullable()); + Ok(ScalarValue::List(list)) + } + DataType::FixedSizeList(field, _size) => { + let empty_arr = new_empty_array(field.data_type()); + let values = Arc::new( + SingleRowListArrayBuilder::new(empty_arr) + .with_nullable(field.is_nullable()) + .build_fixed_size_list_array(0), + ); + Ok(ScalarValue::FixedSizeList(values)) + } + DataType::LargeList(field) => { + let list = ScalarValue::new_large_list(&[], field.data_type()); + Ok(ScalarValue::LargeList(list)) + } + + // Struct types + DataType::Struct(fields) => { + let values = fields + .iter() + .map(|f| ScalarValue::new_default(f.data_type())) + .collect::>>()?; + Ok(ScalarValue::Struct(Arc::new(StructArray::new( + fields.clone(), + values + .into_iter() + .map(|v| v.to_array()) + .collect::>()?, + None, + )))) + } + + // Dictionary types + DataType::Dictionary(key_type, value_type) => Ok(ScalarValue::Dictionary( + key_type.clone(), + Box::new(ScalarValue::new_default(value_type)?), + )), + + // Map types + DataType::Map(field, _) => Ok(ScalarValue::Map(Arc::new(MapArray::from( + ArrayData::new_empty(field.data_type()), + )))), + + // Union types - return first variant with default value + DataType::Union(fields, mode) => { + if let Some((type_id, field)) = fields.iter().next() { + let default_value = ScalarValue::new_default(field.data_type())?; + Ok(ScalarValue::Union( + Some((type_id, Box::new(default_value))), + fields.clone(), + *mode, + )) + } else { + _internal_err!("Union type must have at least one field") + } + } + + // Unsupported types for now + _ => { + _not_impl_err!( + "Default value for data_type \"{datatype:?}\" is not implemented yet" + ) + } + } + } + /// Create an one value in the given type. pub fn new_one(datatype: &DataType) -> Result { Ok(match datatype { @@ -1385,6 +1517,34 @@ impl ScalarValue { DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(1.0))), DataType::Float32 => ScalarValue::Float32(Some(1.0)), DataType::Float64 => ScalarValue::Float64(Some(1.0)), + DataType::Decimal128(precision, scale) => { + validate_decimal_precision_and_scale::( + *precision, *scale, + )?; + if *scale < 0 { + return _internal_err!("Negative scale is not supported"); + } + match i128::from(10).checked_pow(*scale as u32) { + Some(value) => { + ScalarValue::Decimal128(Some(value), *precision, *scale) + } + None => return _internal_err!("Unsupported scale {scale}"), + } + } + DataType::Decimal256(precision, scale) => { + validate_decimal_precision_and_scale::( + *precision, *scale, + )?; + if *scale < 0 { + return _internal_err!("Negative scale is not supported"); + } + match i256::from(10).checked_pow(*scale as u32) { + Some(value) => { + ScalarValue::Decimal256(Some(value), *precision, *scale) + } + None => return _internal_err!("Unsupported scale {scale}"), + } + } _ => { return _not_impl_err!( "Can't create an one scalar from data_type \"{datatype:?}\"" @@ -1403,6 +1563,34 @@ impl ScalarValue { DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(-1.0))), DataType::Float32 => ScalarValue::Float32(Some(-1.0)), DataType::Float64 => ScalarValue::Float64(Some(-1.0)), + DataType::Decimal128(precision, scale) => { + validate_decimal_precision_and_scale::( + *precision, *scale, + )?; + if *scale < 0 { + return _internal_err!("Negative scale is not supported"); + } + match i128::from(10).checked_pow(*scale as u32) { + Some(value) => { + ScalarValue::Decimal128(Some(-value), *precision, *scale) + } + None => return _internal_err!("Unsupported scale {scale}"), + } + } + DataType::Decimal256(precision, scale) => { + validate_decimal_precision_and_scale::( + *precision, *scale, + )?; + if *scale < 0 { + return _internal_err!("Negative scale is not supported"); + } + match i256::from(10).checked_pow(*scale as u32) { + Some(value) => { + ScalarValue::Decimal256(Some(-value), *precision, *scale) + } + None => return _internal_err!("Unsupported scale {scale}"), + } + } _ => { return _not_impl_err!( "Can't create a negative one scalar from data_type \"{datatype:?}\"" @@ -1424,6 +1612,38 @@ impl ScalarValue { DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(10.0))), DataType::Float32 => ScalarValue::Float32(Some(10.0)), DataType::Float64 => ScalarValue::Float64(Some(10.0)), + DataType::Decimal128(precision, scale) => { + if let Err(err) = validate_decimal_precision_and_scale::( + *precision, *scale, + ) { + return _internal_err!("Invalid precision and scale {err}"); + } + if *scale <= 0 { + return _internal_err!("Negative scale is not supported"); + } + match i128::from(10).checked_pow((*scale + 1) as u32) { + Some(value) => { + ScalarValue::Decimal128(Some(value), *precision, *scale) + } + None => return _internal_err!("Unsupported scale {scale}"), + } + } + DataType::Decimal256(precision, scale) => { + if let Err(err) = validate_decimal_precision_and_scale::( + *precision, *scale, + ) { + return _internal_err!("Invalid precision and scale {err}"); + } + if *scale <= 0 { + return _internal_err!("Negative scale is not supported"); + } + match i256::from(10).checked_pow((*scale + 1) as u32) { + Some(value) => { + ScalarValue::Decimal256(Some(value), *precision, *scale) + } + None => return _internal_err!("Unsupported scale {scale}"), + } + } _ => { return _not_impl_err!( "Can't create a ten scalar from data_type \"{datatype:?}\"" @@ -1793,6 +2013,26 @@ impl ScalarValue { (Self::Float64(Some(l)), Self::Float64(Some(r))) => { Some((l - r).abs().round() as _) } + ( + Self::Decimal128(Some(l), lprecision, lscale), + Self::Decimal128(Some(r), rprecision, rscale), + ) => { + if lprecision == rprecision && lscale == rscale { + l.checked_sub(*r)?.checked_abs()?.to_usize() + } else { + None + } + } + ( + Self::Decimal256(Some(l), lprecision, lscale), + Self::Decimal256(Some(r), rprecision, rscale), + ) => { + if lprecision == rprecision && lscale == rscale { + l.checked_sub(*r)?.checked_abs()?.to_usize() + } else { + None + } + } _ => None, } } @@ -2506,18 +2746,33 @@ impl ScalarValue { } }, ScalarValue::List(arr) => { + if size == 1 { + return Ok(Arc::clone(arr) as Arc); + } Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)? } ScalarValue::LargeList(arr) => { + if size == 1 { + return Ok(Arc::clone(arr) as Arc); + } Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)? } ScalarValue::FixedSizeList(arr) => { + if size == 1 { + return Ok(Arc::clone(arr) as Arc); + } Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)? } ScalarValue::Struct(arr) => { + if size == 1 { + return Ok(Arc::clone(arr) as Arc); + } Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)? } ScalarValue::Map(arr) => { + if size == 1 { + return Ok(Arc::clone(arr) as Arc); + } Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)? } ScalarValue::Date32(e) => { @@ -2643,7 +2898,7 @@ impl ScalarValue { value_offsets, child_arrays, ) - .map_err(|e| DataFusionError::ArrowError(e, None))?; + .map_err(|e| DataFusionError::ArrowError(Box::new(e), None))?; Arc::new(ar) } None => { @@ -2665,7 +2920,7 @@ impl ScalarValue { _ => unreachable!("Invalid dictionary keys type: {:?}", key_type), } } - ScalarValue::Null => new_null_array(&DataType::Null, size), + ScalarValue::Null => get_or_create_cached_null_array(size), }) } @@ -2818,30 +3073,32 @@ impl ScalarValue { array, index, *precision, *scale, )? } - DataType::Boolean => typed_cast!(array, index, BooleanArray, Boolean)?, - DataType::Float64 => typed_cast!(array, index, Float64Array, Float64)?, - DataType::Float32 => typed_cast!(array, index, Float32Array, Float32)?, - DataType::Float16 => typed_cast!(array, index, Float16Array, Float16)?, - DataType::UInt64 => typed_cast!(array, index, UInt64Array, UInt64)?, - DataType::UInt32 => typed_cast!(array, index, UInt32Array, UInt32)?, - DataType::UInt16 => typed_cast!(array, index, UInt16Array, UInt16)?, - DataType::UInt8 => typed_cast!(array, index, UInt8Array, UInt8)?, - DataType::Int64 => typed_cast!(array, index, Int64Array, Int64)?, - DataType::Int32 => typed_cast!(array, index, Int32Array, Int32)?, - DataType::Int16 => typed_cast!(array, index, Int16Array, Int16)?, - DataType::Int8 => typed_cast!(array, index, Int8Array, Int8)?, - DataType::Binary => typed_cast!(array, index, BinaryArray, Binary)?, + DataType::Boolean => typed_cast!(array, index, as_boolean_array, Boolean)?, + DataType::Float64 => typed_cast!(array, index, as_float64_array, Float64)?, + DataType::Float32 => typed_cast!(array, index, as_float32_array, Float32)?, + DataType::Float16 => typed_cast!(array, index, as_float16_array, Float16)?, + DataType::UInt64 => typed_cast!(array, index, as_uint64_array, UInt64)?, + DataType::UInt32 => typed_cast!(array, index, as_uint32_array, UInt32)?, + DataType::UInt16 => typed_cast!(array, index, as_uint16_array, UInt16)?, + DataType::UInt8 => typed_cast!(array, index, as_uint8_array, UInt8)?, + DataType::Int64 => typed_cast!(array, index, as_int64_array, Int64)?, + DataType::Int32 => typed_cast!(array, index, as_int32_array, Int32)?, + DataType::Int16 => typed_cast!(array, index, as_int16_array, Int16)?, + DataType::Int8 => typed_cast!(array, index, as_int8_array, Int8)?, + DataType::Binary => typed_cast!(array, index, as_binary_array, Binary)?, DataType::LargeBinary => { - typed_cast!(array, index, LargeBinaryArray, LargeBinary)? + typed_cast!(array, index, as_large_binary_array, LargeBinary)? } DataType::BinaryView => { - typed_cast!(array, index, BinaryViewArray, BinaryView)? + typed_cast!(array, index, as_binary_view_array, BinaryView)? } - DataType::Utf8 => typed_cast!(array, index, StringArray, Utf8)?, + DataType::Utf8 => typed_cast!(array, index, as_string_array, Utf8)?, DataType::LargeUtf8 => { - typed_cast!(array, index, LargeStringArray, LargeUtf8)? + typed_cast!(array, index, as_large_string_array, LargeUtf8)? + } + DataType::Utf8View => { + typed_cast!(array, index, as_string_view_array, Utf8View)? } - DataType::Utf8View => typed_cast!(array, index, StringViewArray, Utf8View)?, DataType::List(field) => { let list_array = array.as_list::(); let nested_array = list_array.value(index); @@ -2851,7 +3108,7 @@ impl ScalarValue { .build_list_scalar() } DataType::LargeList(field) => { - let list_array = as_large_list_array(array); + let list_array = as_large_list_array(array)?; let nested_array = list_array.value(index); // Produces a single element `LargeListArray` with the value at `index`. SingleRowListArrayBuilder::new(nested_array) @@ -2868,45 +3125,45 @@ impl ScalarValue { .with_field(field) .build_fixed_size_list_scalar(list_size) } - DataType::Date32 => typed_cast!(array, index, Date32Array, Date32)?, - DataType::Date64 => typed_cast!(array, index, Date64Array, Date64)?, + DataType::Date32 => typed_cast!(array, index, as_date32_array, Date32)?, + DataType::Date64 => typed_cast!(array, index, as_date64_array, Date64)?, DataType::Time32(TimeUnit::Second) => { - typed_cast!(array, index, Time32SecondArray, Time32Second)? + typed_cast!(array, index, as_time32_second_array, Time32Second)? } DataType::Time32(TimeUnit::Millisecond) => { - typed_cast!(array, index, Time32MillisecondArray, Time32Millisecond)? + typed_cast!(array, index, as_time32_millisecond_array, Time32Millisecond)? } DataType::Time64(TimeUnit::Microsecond) => { - typed_cast!(array, index, Time64MicrosecondArray, Time64Microsecond)? + typed_cast!(array, index, as_time64_microsecond_array, Time64Microsecond)? } DataType::Time64(TimeUnit::Nanosecond) => { - typed_cast!(array, index, Time64NanosecondArray, Time64Nanosecond)? + typed_cast!(array, index, as_time64_nanosecond_array, Time64Nanosecond)? } DataType::Timestamp(TimeUnit::Second, tz_opt) => typed_cast_tz!( array, index, - TimestampSecondArray, + as_timestamp_second_array, TimestampSecond, tz_opt )?, DataType::Timestamp(TimeUnit::Millisecond, tz_opt) => typed_cast_tz!( array, index, - TimestampMillisecondArray, + as_timestamp_millisecond_array, TimestampMillisecond, tz_opt )?, DataType::Timestamp(TimeUnit::Microsecond, tz_opt) => typed_cast_tz!( array, index, - TimestampMicrosecondArray, + as_timestamp_microsecond_array, TimestampMicrosecond, tz_opt )?, DataType::Timestamp(TimeUnit::Nanosecond, tz_opt) => typed_cast_tz!( array, index, - TimestampNanosecondArray, + as_timestamp_nanosecond_array, TimestampNanosecond, tz_opt )?, @@ -2952,36 +3209,42 @@ impl ScalarValue { ) } DataType::Interval(IntervalUnit::DayTime) => { - typed_cast!(array, index, IntervalDayTimeArray, IntervalDayTime)? + typed_cast!(array, index, as_interval_dt_array, IntervalDayTime)? } DataType::Interval(IntervalUnit::YearMonth) => { - typed_cast!(array, index, IntervalYearMonthArray, IntervalYearMonth)? + typed_cast!(array, index, as_interval_ym_array, IntervalYearMonth)? + } + DataType::Interval(IntervalUnit::MonthDayNano) => { + typed_cast!(array, index, as_interval_mdn_array, IntervalMonthDayNano)? } - DataType::Interval(IntervalUnit::MonthDayNano) => typed_cast!( - array, - index, - IntervalMonthDayNanoArray, - IntervalMonthDayNano - )?, DataType::Duration(TimeUnit::Second) => { - typed_cast!(array, index, DurationSecondArray, DurationSecond)? - } - DataType::Duration(TimeUnit::Millisecond) => { - typed_cast!(array, index, DurationMillisecondArray, DurationMillisecond)? - } - DataType::Duration(TimeUnit::Microsecond) => { - typed_cast!(array, index, DurationMicrosecondArray, DurationMicrosecond)? - } - DataType::Duration(TimeUnit::Nanosecond) => { - typed_cast!(array, index, DurationNanosecondArray, DurationNanosecond)? + typed_cast!(array, index, as_duration_second_array, DurationSecond)? } + DataType::Duration(TimeUnit::Millisecond) => typed_cast!( + array, + index, + as_duration_millisecond_array, + DurationMillisecond + )?, + DataType::Duration(TimeUnit::Microsecond) => typed_cast!( + array, + index, + as_duration_microsecond_array, + DurationMicrosecond + )?, + DataType::Duration(TimeUnit::Nanosecond) => typed_cast!( + array, + index, + as_duration_nanosecond_array, + DurationNanosecond + )?, DataType::Map(_, _) => { let a = array.slice(index, 1); Self::Map(Arc::new(a.as_map().to_owned())) } DataType::Union(fields, mode) => { - let array = as_union_array(array); + let array = as_union_array(array)?; let ti = array.type_id(index); let index = array.value_offset(index); let value = ScalarValue::try_from_array(array.child(ti), index)?; @@ -3055,43 +3318,7 @@ impl ScalarValue { target_type: &DataType, cast_options: &CastOptions<'static>, ) -> Result { - let scalar_array = match (self, target_type) { - ( - ScalarValue::Float64(Some(float_ts)), - DataType::Timestamp(TimeUnit::Nanosecond, None), - ) => ScalarValue::Int64(Some((float_ts * 1_000_000_000_f64).trunc() as i64)) - .to_array()?, - ( - ScalarValue::Decimal128(Some(decimal_value), _, scale), - DataType::Timestamp(time_unit, None), - ) => { - let scale_factor = 10_i128.pow(*scale as u32); - let seconds = decimal_value / scale_factor; - let fraction = decimal_value % scale_factor; - - let timestamp_value = match time_unit { - TimeUnit::Second => ScalarValue::Int64(Some(seconds as i64)), - TimeUnit::Millisecond => { - let millis = seconds * 1_000 + (fraction * 1_000) / scale_factor; - ScalarValue::Int64(Some(millis as i64)) - } - TimeUnit::Microsecond => { - let micros = - seconds * 1_000_000 + (fraction * 1_000_000) / scale_factor; - ScalarValue::Int64(Some(micros as i64)) - } - TimeUnit::Nanosecond => { - let nanos = seconds * 1_000_000_000 - + (fraction * 1_000_000_000) / scale_factor; - ScalarValue::Int64(Some(nanos as i64)) - } - }; - - timestamp_value.to_array()? - } - _ => self.to_array()?, - }; - + let scalar_array = self.to_array()?; let cast_arr = cast_with_options(&scalar_array, target_type, cast_options)?; ScalarValue::try_from_array(&cast_arr, 0) } @@ -3182,59 +3409,61 @@ impl ScalarValue { )? } ScalarValue::Boolean(val) => { - eq_array_primitive!(array, index, BooleanArray, val)? + eq_array_primitive!(array, index, as_boolean_array, val)? } ScalarValue::Float16(val) => { - eq_array_primitive!(array, index, Float16Array, val)? + eq_array_primitive!(array, index, as_float16_array, val)? } ScalarValue::Float32(val) => { - eq_array_primitive!(array, index, Float32Array, val)? + eq_array_primitive!(array, index, as_float32_array, val)? } ScalarValue::Float64(val) => { - eq_array_primitive!(array, index, Float64Array, val)? + eq_array_primitive!(array, index, as_float64_array, val)? + } + ScalarValue::Int8(val) => { + eq_array_primitive!(array, index, as_int8_array, val)? } - ScalarValue::Int8(val) => eq_array_primitive!(array, index, Int8Array, val)?, ScalarValue::Int16(val) => { - eq_array_primitive!(array, index, Int16Array, val)? + eq_array_primitive!(array, index, as_int16_array, val)? } ScalarValue::Int32(val) => { - eq_array_primitive!(array, index, Int32Array, val)? + eq_array_primitive!(array, index, as_int32_array, val)? } ScalarValue::Int64(val) => { - eq_array_primitive!(array, index, Int64Array, val)? + eq_array_primitive!(array, index, as_int64_array, val)? } ScalarValue::UInt8(val) => { - eq_array_primitive!(array, index, UInt8Array, val)? + eq_array_primitive!(array, index, as_uint8_array, val)? } ScalarValue::UInt16(val) => { - eq_array_primitive!(array, index, UInt16Array, val)? + eq_array_primitive!(array, index, as_uint16_array, val)? } ScalarValue::UInt32(val) => { - eq_array_primitive!(array, index, UInt32Array, val)? + eq_array_primitive!(array, index, as_uint32_array, val)? } ScalarValue::UInt64(val) => { - eq_array_primitive!(array, index, UInt64Array, val)? + eq_array_primitive!(array, index, as_uint64_array, val)? } ScalarValue::Utf8(val) => { - eq_array_primitive!(array, index, StringArray, val)? + eq_array_primitive!(array, index, as_string_array, val)? } ScalarValue::Utf8View(val) => { - eq_array_primitive!(array, index, StringViewArray, val)? + eq_array_primitive!(array, index, as_string_view_array, val)? } ScalarValue::LargeUtf8(val) => { - eq_array_primitive!(array, index, LargeStringArray, val)? + eq_array_primitive!(array, index, as_large_string_array, val)? } ScalarValue::Binary(val) => { - eq_array_primitive!(array, index, BinaryArray, val)? + eq_array_primitive!(array, index, as_binary_array, val)? } ScalarValue::BinaryView(val) => { - eq_array_primitive!(array, index, BinaryViewArray, val)? + eq_array_primitive!(array, index, as_binary_view_array, val)? } ScalarValue::FixedSizeBinary(_, val) => { - eq_array_primitive!(array, index, FixedSizeBinaryArray, val)? + eq_array_primitive!(array, index, as_fixed_size_binary_array, val)? } ScalarValue::LargeBinary(val) => { - eq_array_primitive!(array, index, LargeBinaryArray, val)? + eq_array_primitive!(array, index, as_large_binary_array, val)? } ScalarValue::List(arr) => { Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index) @@ -3252,58 +3481,58 @@ impl ScalarValue { Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index) } ScalarValue::Date32(val) => { - eq_array_primitive!(array, index, Date32Array, val)? + eq_array_primitive!(array, index, as_date32_array, val)? } ScalarValue::Date64(val) => { - eq_array_primitive!(array, index, Date64Array, val)? + eq_array_primitive!(array, index, as_date64_array, val)? } ScalarValue::Time32Second(val) => { - eq_array_primitive!(array, index, Time32SecondArray, val)? + eq_array_primitive!(array, index, as_time32_second_array, val)? } ScalarValue::Time32Millisecond(val) => { - eq_array_primitive!(array, index, Time32MillisecondArray, val)? + eq_array_primitive!(array, index, as_time32_millisecond_array, val)? } ScalarValue::Time64Microsecond(val) => { - eq_array_primitive!(array, index, Time64MicrosecondArray, val)? + eq_array_primitive!(array, index, as_time64_microsecond_array, val)? } ScalarValue::Time64Nanosecond(val) => { - eq_array_primitive!(array, index, Time64NanosecondArray, val)? + eq_array_primitive!(array, index, as_time64_nanosecond_array, val)? } ScalarValue::TimestampSecond(val, _) => { - eq_array_primitive!(array, index, TimestampSecondArray, val)? + eq_array_primitive!(array, index, as_timestamp_second_array, val)? } ScalarValue::TimestampMillisecond(val, _) => { - eq_array_primitive!(array, index, TimestampMillisecondArray, val)? + eq_array_primitive!(array, index, as_timestamp_millisecond_array, val)? } ScalarValue::TimestampMicrosecond(val, _) => { - eq_array_primitive!(array, index, TimestampMicrosecondArray, val)? + eq_array_primitive!(array, index, as_timestamp_microsecond_array, val)? } ScalarValue::TimestampNanosecond(val, _) => { - eq_array_primitive!(array, index, TimestampNanosecondArray, val)? + eq_array_primitive!(array, index, as_timestamp_nanosecond_array, val)? } ScalarValue::IntervalYearMonth(val) => { - eq_array_primitive!(array, index, IntervalYearMonthArray, val)? + eq_array_primitive!(array, index, as_interval_ym_array, val)? } ScalarValue::IntervalDayTime(val) => { - eq_array_primitive!(array, index, IntervalDayTimeArray, val)? + eq_array_primitive!(array, index, as_interval_dt_array, val)? } ScalarValue::IntervalMonthDayNano(val) => { - eq_array_primitive!(array, index, IntervalMonthDayNanoArray, val)? + eq_array_primitive!(array, index, as_interval_mdn_array, val)? } ScalarValue::DurationSecond(val) => { - eq_array_primitive!(array, index, DurationSecondArray, val)? + eq_array_primitive!(array, index, as_duration_second_array, val)? } ScalarValue::DurationMillisecond(val) => { - eq_array_primitive!(array, index, DurationMillisecondArray, val)? + eq_array_primitive!(array, index, as_duration_millisecond_array, val)? } ScalarValue::DurationMicrosecond(val) => { - eq_array_primitive!(array, index, DurationMicrosecondArray, val)? + eq_array_primitive!(array, index, as_duration_microsecond_array, val)? } ScalarValue::DurationNanosecond(val) => { - eq_array_primitive!(array, index, DurationNanosecondArray, val)? + eq_array_primitive!(array, index, as_duration_nanosecond_array, val)? } ScalarValue::Union(value, _, _) => { - let array = as_union_array(array); + let array = as_union_array(array)?; let ti = array.type_id(index); let index = array.value_offset(index); if let Some((ti_v, value)) = value { @@ -3538,6 +3767,201 @@ impl ScalarValue { self.compact(); self } + + /// Returns the minimum value for the given numeric `DataType`. + /// + /// This function returns the smallest representable value for numeric + /// and temporal data types. For non-numeric types, it returns `None`. + /// + /// # Supported Types + /// + /// - **Integer types**: `i8::MIN`, `i16::MIN`, etc. + /// - **Unsigned types**: Always 0 (`u8::MIN`, `u16::MIN`, etc.) + /// - **Float types**: Negative infinity (IEEE 754) + /// - **Decimal types**: Smallest value based on precision + /// - **Temporal types**: Minimum timestamp/date values + /// - **Time types**: 0 (midnight) + /// - **Duration types**: `i64::MIN` + pub fn min(datatype: &DataType) -> Option { + match datatype { + DataType::Int8 => Some(ScalarValue::Int8(Some(i8::MIN))), + DataType::Int16 => Some(ScalarValue::Int16(Some(i16::MIN))), + DataType::Int32 => Some(ScalarValue::Int32(Some(i32::MIN))), + DataType::Int64 => Some(ScalarValue::Int64(Some(i64::MIN))), + DataType::UInt8 => Some(ScalarValue::UInt8(Some(u8::MIN))), + DataType::UInt16 => Some(ScalarValue::UInt16(Some(u16::MIN))), + DataType::UInt32 => Some(ScalarValue::UInt32(Some(u32::MIN))), + DataType::UInt64 => Some(ScalarValue::UInt64(Some(u64::MIN))), + DataType::Float16 => Some(ScalarValue::Float16(Some(f16::NEG_INFINITY))), + DataType::Float32 => Some(ScalarValue::Float32(Some(f32::NEG_INFINITY))), + DataType::Float64 => Some(ScalarValue::Float64(Some(f64::NEG_INFINITY))), + DataType::Decimal128(precision, scale) => { + // For decimal, min is -10^(precision-scale) + 10^(-scale) + // But for simplicity, we use the minimum i128 value that fits the precision + let max_digits = 10_i128.pow(*precision as u32) - 1; + Some(ScalarValue::Decimal128( + Some(-max_digits), + *precision, + *scale, + )) + } + DataType::Decimal256(precision, scale) => { + // Similar to Decimal128 but with i256 + // For now, use a large negative value + let max_digits = i256::from_i128(10_i128) + .checked_pow(*precision as u32) + .and_then(|v| v.checked_sub(i256::from_i128(1))) + .unwrap_or(i256::MAX); + Some(ScalarValue::Decimal256( + Some(max_digits.neg_wrapping()), + *precision, + *scale, + )) + } + DataType::Date32 => Some(ScalarValue::Date32(Some(i32::MIN))), + DataType::Date64 => Some(ScalarValue::Date64(Some(i64::MIN))), + DataType::Time32(TimeUnit::Second) => { + Some(ScalarValue::Time32Second(Some(0))) + } + DataType::Time32(TimeUnit::Millisecond) => { + Some(ScalarValue::Time32Millisecond(Some(0))) + } + DataType::Time64(TimeUnit::Microsecond) => { + Some(ScalarValue::Time64Microsecond(Some(0))) + } + DataType::Time64(TimeUnit::Nanosecond) => { + Some(ScalarValue::Time64Nanosecond(Some(0))) + } + DataType::Timestamp(unit, tz) => match unit { + TimeUnit::Second => { + Some(ScalarValue::TimestampSecond(Some(i64::MIN), tz.clone())) + } + TimeUnit::Millisecond => Some(ScalarValue::TimestampMillisecond( + Some(i64::MIN), + tz.clone(), + )), + TimeUnit::Microsecond => Some(ScalarValue::TimestampMicrosecond( + Some(i64::MIN), + tz.clone(), + )), + TimeUnit::Nanosecond => { + Some(ScalarValue::TimestampNanosecond(Some(i64::MIN), tz.clone())) + } + }, + DataType::Duration(unit) => match unit { + TimeUnit::Second => Some(ScalarValue::DurationSecond(Some(i64::MIN))), + TimeUnit::Millisecond => { + Some(ScalarValue::DurationMillisecond(Some(i64::MIN))) + } + TimeUnit::Microsecond => { + Some(ScalarValue::DurationMicrosecond(Some(i64::MIN))) + } + TimeUnit::Nanosecond => { + Some(ScalarValue::DurationNanosecond(Some(i64::MIN))) + } + }, + _ => None, + } + } + + /// Returns the maximum value for the given numeric `DataType`. + /// + /// This function returns the largest representable value for numeric + /// and temporal data types. For non-numeric types, it returns `None`. + /// + /// # Supported Types + /// + /// - **Integer types**: `i8::MAX`, `i16::MAX`, etc. + /// - **Unsigned types**: `u8::MAX`, `u16::MAX`, etc. + /// - **Float types**: Positive infinity (IEEE 754) + /// - **Decimal types**: Largest value based on precision + /// - **Temporal types**: Maximum timestamp/date values + /// - **Time types**: Maximum time in the day (1 day - 1 unit) + /// - **Duration types**: `i64::MAX` + pub fn max(datatype: &DataType) -> Option { + match datatype { + DataType::Int8 => Some(ScalarValue::Int8(Some(i8::MAX))), + DataType::Int16 => Some(ScalarValue::Int16(Some(i16::MAX))), + DataType::Int32 => Some(ScalarValue::Int32(Some(i32::MAX))), + DataType::Int64 => Some(ScalarValue::Int64(Some(i64::MAX))), + DataType::UInt8 => Some(ScalarValue::UInt8(Some(u8::MAX))), + DataType::UInt16 => Some(ScalarValue::UInt16(Some(u16::MAX))), + DataType::UInt32 => Some(ScalarValue::UInt32(Some(u32::MAX))), + DataType::UInt64 => Some(ScalarValue::UInt64(Some(u64::MAX))), + DataType::Float16 => Some(ScalarValue::Float16(Some(f16::INFINITY))), + DataType::Float32 => Some(ScalarValue::Float32(Some(f32::INFINITY))), + DataType::Float64 => Some(ScalarValue::Float64(Some(f64::INFINITY))), + DataType::Decimal128(precision, scale) => { + // For decimal, max is 10^(precision-scale) - 10^(-scale) + // But for simplicity, we use the maximum i128 value that fits the precision + let max_digits = 10_i128.pow(*precision as u32) - 1; + Some(ScalarValue::Decimal128( + Some(max_digits), + *precision, + *scale, + )) + } + DataType::Decimal256(precision, scale) => { + // Similar to Decimal128 but with i256 + let max_digits = i256::from_i128(10_i128) + .checked_pow(*precision as u32) + .and_then(|v| v.checked_sub(i256::from_i128(1))) + .unwrap_or(i256::MAX); + Some(ScalarValue::Decimal256( + Some(max_digits), + *precision, + *scale, + )) + } + DataType::Date32 => Some(ScalarValue::Date32(Some(i32::MAX))), + DataType::Date64 => Some(ScalarValue::Date64(Some(i64::MAX))), + DataType::Time32(TimeUnit::Second) => { + // 86399 seconds = 23:59:59 + Some(ScalarValue::Time32Second(Some(86_399))) + } + DataType::Time32(TimeUnit::Millisecond) => { + // 86_399_999 milliseconds = 23:59:59.999 + Some(ScalarValue::Time32Millisecond(Some(86_399_999))) + } + DataType::Time64(TimeUnit::Microsecond) => { + // 86_399_999_999 microseconds = 23:59:59.999999 + Some(ScalarValue::Time64Microsecond(Some(86_399_999_999))) + } + DataType::Time64(TimeUnit::Nanosecond) => { + // 86_399_999_999_999 nanoseconds = 23:59:59.999999999 + Some(ScalarValue::Time64Nanosecond(Some(86_399_999_999_999))) + } + DataType::Timestamp(unit, tz) => match unit { + TimeUnit::Second => { + Some(ScalarValue::TimestampSecond(Some(i64::MAX), tz.clone())) + } + TimeUnit::Millisecond => Some(ScalarValue::TimestampMillisecond( + Some(i64::MAX), + tz.clone(), + )), + TimeUnit::Microsecond => Some(ScalarValue::TimestampMicrosecond( + Some(i64::MAX), + tz.clone(), + )), + TimeUnit::Nanosecond => { + Some(ScalarValue::TimestampNanosecond(Some(i64::MAX), tz.clone())) + } + }, + DataType::Duration(unit) => match unit { + TimeUnit::Second => Some(ScalarValue::DurationSecond(Some(i64::MAX))), + TimeUnit::Millisecond => { + Some(ScalarValue::DurationMillisecond(Some(i64::MAX))) + } + TimeUnit::Microsecond => { + Some(ScalarValue::DurationMicrosecond(Some(i64::MAX))) + } + TimeUnit::Nanosecond => { + Some(ScalarValue::DurationNanosecond(Some(i64::MAX))) + } + }, + _ => None, + } + } } /// Compacts the data of an `ArrayData` into a new `ArrayData`. @@ -4164,17 +4588,19 @@ impl ScalarType for Date32Type { #[cfg(test)] mod tests { - use super::*; - use crate::cast::{ - as_map_array, as_string_array, as_struct_array, as_uint32_array, as_uint64_array, - }; - + use crate::cast::{as_list_array, as_map_array, as_struct_array}; use crate::test_util::batches_to_string; - use arrow::array::{types::Float64Type, NullBufferBuilder}; + use arrow::array::{ + FixedSizeListBuilder, Int32Builder, LargeListBuilder, ListBuilder, MapBuilder, + NullArray, NullBufferBuilder, OffsetSizeTrait, PrimitiveBuilder, RecordBatch, + StringBuilder, StringDictionaryBuilder, StructBuilder, UnionBuilder, + }; use arrow::buffer::{Buffer, OffsetBuffer}; use arrow::compute::{is_null, kernels}; - use arrow::datatypes::Fields; + use arrow::datatypes::{ + ArrowNumericType, Fields, Float64Type, DECIMAL256_MAX_PRECISION, + }; use arrow::error::ArrowError; use arrow::util::pretty::pretty_format_columns; use chrono::NaiveDate; @@ -4531,7 +4957,7 @@ mod tests { ]); let array = ScalarValue::iter_to_array(scalars).unwrap(); - let list_array = as_list_array(&array); + let list_array = as_list_array(&array).unwrap(); // List[[1,2,3], null, [4,5]] let expected = ListArray::from_iter_primitive::(vec![ Some(vec![Some(1), Some(2), Some(3)]), @@ -4547,7 +4973,7 @@ mod tests { ]); let array = ScalarValue::iter_to_array(scalars).unwrap(); - let list_array = as_large_list_array(&array); + let list_array = as_large_list_array(&array).unwrap(); let expected = LargeListArray::from_iter_primitive::(vec![ Some(vec![Some(1), Some(2), Some(3)]), None, @@ -4611,6 +5037,17 @@ mod tests { } } + #[test] + fn test_eq_array_err_message() { + assert_starts_with( + ScalarValue::Utf8(Some("123".to_string())) + .eq_array(&(Arc::new(Int32Array::from(vec![123])) as ArrayRef), 0) + .unwrap_err() + .message(), + "could not cast array of type Int32 to arrow_array::array::byte_array::GenericByteArray>", + ); + } + #[test] fn scalar_add_trait_test() -> Result<()> { let float_value = ScalarValue::Float64(Some(123.)); @@ -4899,6 +5336,116 @@ mod tests { Ok(()) } + #[test] + fn test_new_one_decimal128() { + assert_eq!( + ScalarValue::new_one(&DataType::Decimal128(5, 0)).unwrap(), + ScalarValue::Decimal128(Some(1), 5, 0) + ); + assert_eq!( + ScalarValue::new_one(&DataType::Decimal128(5, 1)).unwrap(), + ScalarValue::Decimal128(Some(10), 5, 1) + ); + assert_eq!( + ScalarValue::new_one(&DataType::Decimal128(5, 2)).unwrap(), + ScalarValue::Decimal128(Some(100), 5, 2) + ); + // More precision + assert_eq!( + ScalarValue::new_one(&DataType::Decimal128(7, 2)).unwrap(), + ScalarValue::Decimal128(Some(100), 7, 2) + ); + // No negative scale + assert!(ScalarValue::new_one(&DataType::Decimal128(5, -1)).is_err()); + // Invalid combination + assert!(ScalarValue::new_one(&DataType::Decimal128(0, 2)).is_err()); + assert!(ScalarValue::new_one(&DataType::Decimal128(5, 7)).is_err()); + } + + #[test] + fn test_new_one_decimal256() { + assert_eq!( + ScalarValue::new_one(&DataType::Decimal256(5, 0)).unwrap(), + ScalarValue::Decimal256(Some(1.into()), 5, 0) + ); + assert_eq!( + ScalarValue::new_one(&DataType::Decimal256(5, 1)).unwrap(), + ScalarValue::Decimal256(Some(10.into()), 5, 1) + ); + assert_eq!( + ScalarValue::new_one(&DataType::Decimal256(5, 2)).unwrap(), + ScalarValue::Decimal256(Some(100.into()), 5, 2) + ); + // More precision + assert_eq!( + ScalarValue::new_one(&DataType::Decimal256(7, 2)).unwrap(), + ScalarValue::Decimal256(Some(100.into()), 7, 2) + ); + // No negative scale + assert!(ScalarValue::new_one(&DataType::Decimal256(5, -1)).is_err()); + // Invalid combination + assert!(ScalarValue::new_one(&DataType::Decimal256(0, 2)).is_err()); + assert!(ScalarValue::new_one(&DataType::Decimal256(5, 7)).is_err()); + } + + #[test] + fn test_new_ten_decimal128() { + assert_eq!( + ScalarValue::new_ten(&DataType::Decimal128(5, 1)).unwrap(), + ScalarValue::Decimal128(Some(100), 5, 1) + ); + assert_eq!( + ScalarValue::new_ten(&DataType::Decimal128(5, 2)).unwrap(), + ScalarValue::Decimal128(Some(1000), 5, 2) + ); + // More precision + assert_eq!( + ScalarValue::new_ten(&DataType::Decimal128(7, 2)).unwrap(), + ScalarValue::Decimal128(Some(1000), 7, 2) + ); + // No negative or zero scale + assert!(ScalarValue::new_ten(&DataType::Decimal128(5, 0)).is_err()); + assert!(ScalarValue::new_ten(&DataType::Decimal128(5, -1)).is_err()); + // Invalid combination + assert!(ScalarValue::new_ten(&DataType::Decimal128(0, 2)).is_err()); + assert!(ScalarValue::new_ten(&DataType::Decimal128(5, 7)).is_err()); + } + + #[test] + fn test_new_ten_decimal256() { + assert_eq!( + ScalarValue::new_ten(&DataType::Decimal256(5, 1)).unwrap(), + ScalarValue::Decimal256(Some(100.into()), 5, 1) + ); + assert_eq!( + ScalarValue::new_ten(&DataType::Decimal256(5, 2)).unwrap(), + ScalarValue::Decimal256(Some(1000.into()), 5, 2) + ); + // More precision + assert_eq!( + ScalarValue::new_ten(&DataType::Decimal256(7, 2)).unwrap(), + ScalarValue::Decimal256(Some(1000.into()), 7, 2) + ); + // No negative or zero scale + assert!(ScalarValue::new_ten(&DataType::Decimal256(5, 0)).is_err()); + assert!(ScalarValue::new_ten(&DataType::Decimal256(5, -1)).is_err()); + // Invalid combination + assert!(ScalarValue::new_ten(&DataType::Decimal256(0, 2)).is_err()); + assert!(ScalarValue::new_ten(&DataType::Decimal256(5, 7)).is_err()); + } + + #[test] + fn test_new_negative_one_decimal128() { + assert_eq!( + ScalarValue::new_negative_one(&DataType::Decimal128(5, 0)).unwrap(), + ScalarValue::Decimal128(Some(-1), 5, 0) + ); + assert_eq!( + ScalarValue::new_negative_one(&DataType::Decimal128(5, 2)).unwrap(), + ScalarValue::Decimal128(Some(-100), 5, 2) + ); + } + #[test] fn test_list_partial_cmp() { let a = @@ -6661,10 +7208,7 @@ mod tests { let err = value.arithmetic_negate().expect_err("Should receive overflow error on negating {value:?}"); let root_err = err.find_root(); match root_err{ - DataFusionError::ArrowError( - ArrowError::ArithmeticOverflow(_), - _, - ) => {} + DataFusionError::ArrowError(err, _) if matches!(err.as_ref(), ArrowError::ArithmeticOverflow(_)) => {} _ => return Err(err), }; } @@ -6952,6 +7496,26 @@ mod tests { ScalarValue::Float64(Some(-9.9)), 5, ), + ( + ScalarValue::Decimal128(Some(10), 1, 0), + ScalarValue::Decimal128(Some(5), 1, 0), + 5, + ), + ( + ScalarValue::Decimal128(Some(5), 1, 0), + ScalarValue::Decimal128(Some(10), 1, 0), + 5, + ), + ( + ScalarValue::Decimal256(Some(10.into()), 1, 0), + ScalarValue::Decimal256(Some(5.into()), 1, 0), + 5, + ), + ( + ScalarValue::Decimal256(Some(5.into()), 1, 0), + ScalarValue::Decimal256(Some(10.into()), 1, 0), + 5, + ), ]; for (lhs, rhs, expected) in cases.iter() { let distance = lhs.distance(rhs).unwrap(); @@ -6959,6 +7523,24 @@ mod tests { } } + #[test] + fn test_distance_none() { + let cases = [ + ( + ScalarValue::Decimal128(Some(i128::MAX), DECIMAL128_MAX_PRECISION, 0), + ScalarValue::Decimal128(Some(-i128::MAX), DECIMAL128_MAX_PRECISION, 0), + ), + ( + ScalarValue::Decimal256(Some(i256::MAX), DECIMAL256_MAX_PRECISION, 0), + ScalarValue::Decimal256(Some(-i256::MAX), DECIMAL256_MAX_PRECISION, 0), + ), + ]; + for (lhs, rhs) in cases.iter() { + let distance = lhs.distance(rhs); + assert!(distance.is_none(), "{lhs} vs {rhs}"); + } + } + #[test] fn test_scalar_distance_invalid() { let cases = [ @@ -7000,7 +7582,33 @@ mod tests { (ScalarValue::Date64(Some(0)), ScalarValue::Date64(Some(1))), ( ScalarValue::Decimal128(Some(123), 5, 5), - ScalarValue::Decimal128(Some(120), 5, 5), + ScalarValue::Decimal128(Some(120), 5, 3), + ), + ( + ScalarValue::Decimal128(Some(123), 5, 5), + ScalarValue::Decimal128(Some(120), 3, 5), + ), + ( + ScalarValue::Decimal256(Some(123.into()), 5, 5), + ScalarValue::Decimal256(Some(120.into()), 3, 5), + ), + // Distance 2 * 2^50 is larger than usize + ( + ScalarValue::Decimal256( + Some(i256::from_parts(0, 2_i64.pow(50).into())), + 1, + 0, + ), + ScalarValue::Decimal256( + Some(i256::from_parts(0, (-(2_i64).pow(50)).into())), + 1, + 0, + ), + ), + // Distance overflow + ( + ScalarValue::Decimal256(Some(i256::from_parts(0, i128::MAX)), 1, 0), + ScalarValue::Decimal256(Some(i256::from_parts(0, -i128::MAX)), 1, 0), ), ]; for (lhs, rhs) in cases { @@ -7712,4 +8320,323 @@ mod tests { "Expected '{actual}' to start with '{expected_prefix}'" ); } + + #[test] + fn test_new_default() { + // Test numeric types + assert_eq!( + ScalarValue::new_default(&DataType::Int32).unwrap(), + ScalarValue::Int32(Some(0)) + ); + assert_eq!( + ScalarValue::new_default(&DataType::Float64).unwrap(), + ScalarValue::Float64(Some(0.0)) + ); + assert_eq!( + ScalarValue::new_default(&DataType::Boolean).unwrap(), + ScalarValue::Boolean(Some(false)) + ); + + // Test string types + assert_eq!( + ScalarValue::new_default(&DataType::Utf8).unwrap(), + ScalarValue::Utf8(Some("".to_string())) + ); + assert_eq!( + ScalarValue::new_default(&DataType::LargeUtf8).unwrap(), + ScalarValue::LargeUtf8(Some("".to_string())) + ); + + // Test binary types + assert_eq!( + ScalarValue::new_default(&DataType::Binary).unwrap(), + ScalarValue::Binary(Some(vec![])) + ); + + // Test fixed size binary + assert_eq!( + ScalarValue::new_default(&DataType::FixedSizeBinary(5)).unwrap(), + ScalarValue::FixedSizeBinary(5, Some(vec![0, 0, 0, 0, 0])) + ); + + // Test temporal types + assert_eq!( + ScalarValue::new_default(&DataType::Date32).unwrap(), + ScalarValue::Date32(Some(0)) + ); + assert_eq!( + ScalarValue::new_default(&DataType::Time32(TimeUnit::Second)).unwrap(), + ScalarValue::Time32Second(Some(0)) + ); + + // Test decimal types + assert_eq!( + ScalarValue::new_default(&DataType::Decimal128(10, 2)).unwrap(), + ScalarValue::Decimal128(Some(0), 10, 2) + ); + + // Test list type + let list_field = Field::new_list_field(DataType::Int32, true); + let list_result = + ScalarValue::new_default(&DataType::List(Arc::new(list_field.clone()))) + .unwrap(); + match list_result { + ScalarValue::List(arr) => { + assert_eq!(arr.len(), 1); + assert_eq!(arr.value_length(0), 0); // empty list + } + _ => panic!("Expected List"), + } + + // Test struct type + let struct_fields = Fields::from(vec![ + Field::new("a", DataType::Int32, false), + Field::new("b", DataType::Utf8, false), + ]); + let struct_result = + ScalarValue::new_default(&DataType::Struct(struct_fields.clone())).unwrap(); + match struct_result { + ScalarValue::Struct(arr) => { + assert_eq!(arr.len(), 1); + assert_eq!(arr.column(0).as_primitive::().value(0), 0); + assert_eq!(arr.column(1).as_string::().value(0), ""); + } + _ => panic!("Expected Struct"), + } + + // Test union type + let union_fields = UnionFields::new( + vec![0, 1], + vec![ + Field::new("i32", DataType::Int32, false), + Field::new("f64", DataType::Float64, false), + ], + ); + let union_result = ScalarValue::new_default(&DataType::Union( + union_fields.clone(), + UnionMode::Sparse, + )) + .unwrap(); + match union_result { + ScalarValue::Union(Some((type_id, value)), _, _) => { + assert_eq!(type_id, 0); + assert_eq!(*value, ScalarValue::Int32(Some(0))); + } + _ => panic!("Expected Union"), + } + } + + #[test] + fn test_scalar_min() { + // Test integer types + assert_eq!( + ScalarValue::min(&DataType::Int8), + Some(ScalarValue::Int8(Some(i8::MIN))) + ); + assert_eq!( + ScalarValue::min(&DataType::Int32), + Some(ScalarValue::Int32(Some(i32::MIN))) + ); + assert_eq!( + ScalarValue::min(&DataType::UInt8), + Some(ScalarValue::UInt8(Some(0))) + ); + assert_eq!( + ScalarValue::min(&DataType::UInt64), + Some(ScalarValue::UInt64(Some(0))) + ); + + // Test float types + assert_eq!( + ScalarValue::min(&DataType::Float32), + Some(ScalarValue::Float32(Some(f32::NEG_INFINITY))) + ); + assert_eq!( + ScalarValue::min(&DataType::Float64), + Some(ScalarValue::Float64(Some(f64::NEG_INFINITY))) + ); + + // Test decimal types + let decimal_min = ScalarValue::min(&DataType::Decimal128(5, 2)).unwrap(); + match decimal_min { + ScalarValue::Decimal128(Some(val), 5, 2) => { + assert_eq!(val, -99999); // -999.99 with scale 2 + } + _ => panic!("Expected Decimal128"), + } + + // Test temporal types + assert_eq!( + ScalarValue::min(&DataType::Date32), + Some(ScalarValue::Date32(Some(i32::MIN))) + ); + assert_eq!( + ScalarValue::min(&DataType::Time32(TimeUnit::Second)), + Some(ScalarValue::Time32Second(Some(0))) + ); + assert_eq!( + ScalarValue::min(&DataType::Timestamp(TimeUnit::Nanosecond, None)), + Some(ScalarValue::TimestampNanosecond(Some(i64::MIN), None)) + ); + + // Test duration types + assert_eq!( + ScalarValue::min(&DataType::Duration(TimeUnit::Second)), + Some(ScalarValue::DurationSecond(Some(i64::MIN))) + ); + + // Test unsupported types + assert_eq!(ScalarValue::min(&DataType::Utf8), None); + assert_eq!(ScalarValue::min(&DataType::Binary), None); + assert_eq!( + ScalarValue::min(&DataType::List(Arc::new(Field::new( + "item", + DataType::Int32, + true + )))), + None + ); + } + + #[test] + fn test_scalar_max() { + // Test integer types + assert_eq!( + ScalarValue::max(&DataType::Int8), + Some(ScalarValue::Int8(Some(i8::MAX))) + ); + assert_eq!( + ScalarValue::max(&DataType::Int32), + Some(ScalarValue::Int32(Some(i32::MAX))) + ); + assert_eq!( + ScalarValue::max(&DataType::UInt8), + Some(ScalarValue::UInt8(Some(u8::MAX))) + ); + assert_eq!( + ScalarValue::max(&DataType::UInt64), + Some(ScalarValue::UInt64(Some(u64::MAX))) + ); + + // Test float types + assert_eq!( + ScalarValue::max(&DataType::Float32), + Some(ScalarValue::Float32(Some(f32::INFINITY))) + ); + assert_eq!( + ScalarValue::max(&DataType::Float64), + Some(ScalarValue::Float64(Some(f64::INFINITY))) + ); + + // Test decimal types + let decimal_max = ScalarValue::max(&DataType::Decimal128(5, 2)).unwrap(); + match decimal_max { + ScalarValue::Decimal128(Some(val), 5, 2) => { + assert_eq!(val, 99999); // 999.99 with scale 2 + } + _ => panic!("Expected Decimal128"), + } + + // Test temporal types + assert_eq!( + ScalarValue::max(&DataType::Date32), + Some(ScalarValue::Date32(Some(i32::MAX))) + ); + assert_eq!( + ScalarValue::max(&DataType::Time32(TimeUnit::Second)), + Some(ScalarValue::Time32Second(Some(86_399))) // 23:59:59 + ); + assert_eq!( + ScalarValue::max(&DataType::Time64(TimeUnit::Microsecond)), + Some(ScalarValue::Time64Microsecond(Some(86_399_999_999))) // 23:59:59.999999 + ); + assert_eq!( + ScalarValue::max(&DataType::Timestamp(TimeUnit::Nanosecond, None)), + Some(ScalarValue::TimestampNanosecond(Some(i64::MAX), None)) + ); + + // Test duration types + assert_eq!( + ScalarValue::max(&DataType::Duration(TimeUnit::Millisecond)), + Some(ScalarValue::DurationMillisecond(Some(i64::MAX))) + ); + + // Test unsupported types + assert_eq!(ScalarValue::max(&DataType::Utf8), None); + assert_eq!(ScalarValue::max(&DataType::Binary), None); + assert_eq!( + ScalarValue::max(&DataType::Struct(Fields::from(vec![Field::new( + "field", + DataType::Int32, + true + )]))), + None + ); + } + + #[test] + fn test_min_max_float16() { + // Test Float16 min and max + let min_f16 = ScalarValue::min(&DataType::Float16).unwrap(); + match min_f16 { + ScalarValue::Float16(Some(val)) => { + assert_eq!(val, f16::NEG_INFINITY); + } + _ => panic!("Expected Float16"), + } + + let max_f16 = ScalarValue::max(&DataType::Float16).unwrap(); + match max_f16 { + ScalarValue::Float16(Some(val)) => { + assert_eq!(val, f16::INFINITY); + } + _ => panic!("Expected Float16"), + } + } + + #[test] + fn test_new_default_interval() { + // Test all interval types + assert_eq!( + ScalarValue::new_default(&DataType::Interval(IntervalUnit::YearMonth)) + .unwrap(), + ScalarValue::IntervalYearMonth(Some(0)) + ); + assert_eq!( + ScalarValue::new_default(&DataType::Interval(IntervalUnit::DayTime)).unwrap(), + ScalarValue::IntervalDayTime(Some(IntervalDayTime::ZERO)) + ); + assert_eq!( + ScalarValue::new_default(&DataType::Interval(IntervalUnit::MonthDayNano)) + .unwrap(), + ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano::ZERO)) + ); + } + + #[test] + fn test_min_max_with_timezone() { + let tz = Some(Arc::from("UTC")); + + // Test timestamp with timezone + let min_ts = + ScalarValue::min(&DataType::Timestamp(TimeUnit::Second, tz.clone())).unwrap(); + match min_ts { + ScalarValue::TimestampSecond(Some(val), Some(tz_str)) => { + assert_eq!(val, i64::MIN); + assert_eq!(tz_str.as_ref(), "UTC"); + } + _ => panic!("Expected TimestampSecond with timezone"), + } + + let max_ts = + ScalarValue::max(&DataType::Timestamp(TimeUnit::Millisecond, tz.clone())) + .unwrap(); + match max_ts { + ScalarValue::TimestampMillisecond(Some(val), Some(tz_str)) => { + assert_eq!(val, i64::MAX); + assert_eq!(tz_str.as_ref(), "UTC"); + } + _ => panic!("Expected TimestampMillisecond with timezone"), + } + } } diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index 9747f44240604..3a0259ec64bbf 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -47,6 +47,7 @@ compression = [ "bzip2", "flate2", "zstd", + "arrow-ipc/zstd", "datafusion-datasource/compression", ] crypto_expressions = ["datafusion-functions/crypto_expressions"] @@ -61,6 +62,7 @@ default = [ "unicode_expressions", "compression", "parquet", + "parquet_encryption", "recursive_protection", ] encoding_expressions = ["datafusion-functions/encoding_expressions"] @@ -68,6 +70,13 @@ encoding_expressions = ["datafusion-functions/encoding_expressions"] force_hash_collisions = ["datafusion-physical-plan/force_hash_collisions", "datafusion-common/force_hash_collisions"] math_expressions = ["datafusion-functions/math_expressions"] parquet = ["datafusion-common/parquet", "dep:parquet", "datafusion-datasource-parquet"] +parquet_encryption = [ + "dep:parquet", + "parquet/encryption", + "datafusion-common/parquet_encryption", + "datafusion-datasource-parquet/parquet_encryption", + "dep:hex", +] pyarrow = ["datafusion-common/pyarrow", "parquet"] regex_expressions = [ "datafusion-functions/regex_expressions", @@ -127,6 +136,7 @@ datafusion-session = { workspace = true } datafusion-sql = { workspace = true } flate2 = { version = "1.1.2", optional = true } futures = { workspace = true } +hex = { workspace = true, optional = true } itertools = { workspace = true } log = { workspace = true } object_store = { workspace = true } @@ -154,6 +164,7 @@ datafusion-macros = { workspace = true } datafusion-physical-optimizer = { workspace = true } doc-comment = { workspace = true } env_logger = { workspace = true } +glob = { version = "0.3.0" } insta = { workspace = true } paste = "^1.0" rand = { workspace = true, features = ["small_rng"] } @@ -161,7 +172,7 @@ rand_distr = "0.5" regex = { workspace = true } rstest = { workspace = true } serde_json = { workspace = true } -sysinfo = "0.35.2" +sysinfo = "0.36.1" test-utils = { path = "../../test-utils" } tokio = { workspace = true, features = ["rt-multi-thread", "parking_lot", "fs"] } diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs index c9a05fd650fcf..a19e6f5581621 100644 --- a/datafusion/core/src/dataframe/mod.rs +++ b/datafusion/core/src/dataframe/mod.rs @@ -1710,6 +1710,40 @@ impl DataFrame { }) } + /// Calculate the distinct intersection of two [`DataFrame`]s. The two [`DataFrame`]s must have exactly the same schema + /// + /// ``` + /// # use datafusion::prelude::*; + /// # use datafusion::error::Result; + /// # use datafusion_common::assert_batches_sorted_eq; + /// # #[tokio::main] + /// # async fn main() -> Result<()> { + /// let ctx = SessionContext::new(); + /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; + /// let d2 = ctx.read_csv("tests/data/example_long.csv", CsvReadOptions::new()).await?; + /// let df = df.intersect_distinct(d2)?; + /// let expected = vec![ + /// "+---+---+---+", + /// "| a | b | c |", + /// "+---+---+---+", + /// "| 1 | 2 | 3 |", + /// "+---+---+---+" + /// ]; + /// # assert_batches_sorted_eq!(expected, &df.collect().await?); + /// # Ok(()) + /// # } + /// ``` + pub fn intersect_distinct(self, dataframe: DataFrame) -> Result { + let left_plan = self.plan; + let right_plan = dataframe.plan; + let plan = LogicalPlanBuilder::intersect(left_plan, right_plan, false)?; + Ok(DataFrame { + session_state: self.session_state, + plan, + projection_requires_validation: true, + }) + } + /// Calculate the exception of two [`DataFrame`]s. The two [`DataFrame`]s must have exactly the same schema /// /// ``` @@ -1746,6 +1780,42 @@ impl DataFrame { }) } + /// Calculate the distinct exception of two [`DataFrame`]s. The two [`DataFrame`]s must have exactly the same schema + /// + /// ``` + /// # use datafusion::prelude::*; + /// # use datafusion::error::Result; + /// # use datafusion_common::assert_batches_sorted_eq; + /// # #[tokio::main] + /// # async fn main() -> Result<()> { + /// let ctx = SessionContext::new(); + /// let df = ctx.read_csv("tests/data/example_long.csv", CsvReadOptions::new()).await?; + /// let d2 = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; + /// let result = df.except_distinct(d2)?; + /// // those columns are not in example.csv, but in example_long.csv + /// let expected = vec![ + /// "+---+---+---+", + /// "| a | b | c |", + /// "+---+---+---+", + /// "| 4 | 5 | 6 |", + /// "| 7 | 8 | 9 |", + /// "+---+---+---+" + /// ]; + /// # assert_batches_sorted_eq!(expected, &result.collect().await?); + /// # Ok(()) + /// # } + /// ``` + pub fn except_distinct(self, dataframe: DataFrame) -> Result { + let left_plan = self.plan; + let right_plan = dataframe.plan; + let plan = LogicalPlanBuilder::except(left_plan, right_plan, false)?; + Ok(DataFrame { + session_state: self.session_state, + plan, + projection_requires_validation: true, + }) + } + /// Execute this `DataFrame` and write the results to `table_name`. /// /// Returns a single [RecordBatch] containing a single column and @@ -2036,10 +2106,11 @@ impl DataFrame { match self.plan.schema().qualified_field_from_column(&old_column) { Ok(qualifier_and_field) => qualifier_and_field, // no-op if field not found - Err(DataFusionError::SchemaError( - SchemaError::FieldNotFound { .. }, - _, - )) => return Ok(self), + Err(DataFusionError::SchemaError(e, _)) + if matches!(*e, SchemaError::FieldNotFound { .. }) => + { + return Ok(self); + } Err(err) => return Err(err), }; let projection = self diff --git a/datafusion/core/src/dataframe/parquet.rs b/datafusion/core/src/dataframe/parquet.rs index a2bec74ee1408..83bb60184fb92 100644 --- a/datafusion/core/src/dataframe/parquet.rs +++ b/datafusion/core/src/dataframe/parquet.rs @@ -247,6 +247,7 @@ mod tests { Ok(()) } + #[cfg(feature = "parquet_encryption")] #[tokio::test] async fn roundtrip_parquet_with_encryption() -> Result<()> { use parquet::encryption::decrypt::FileDecryptionProperties; diff --git a/datafusion/core/src/datasource/file_format/arrow.rs b/datafusion/core/src/datasource/file_format/arrow.rs index b620ff62d9a65..19e884601c543 100644 --- a/datafusion/core/src/datasource/file_format/arrow.rs +++ b/datafusion/core/src/datasource/file_format/arrow.rs @@ -134,6 +134,10 @@ impl FileFormat for ArrowFormat { } } + fn compression_type(&self) -> Option { + None + } + async fn infer_schema( &self, _state: &dyn Session, @@ -294,7 +298,7 @@ impl FileSink for ArrowFileSink { demux_task .join_unwind() .await - .map_err(DataFusionError::ExecutionJoin)??; + .map_err(|e| DataFusionError::ExecutionJoin(Box::new(e)))??; Ok(row_count as u64) } } diff --git a/datafusion/core/src/datasource/file_format/csv.rs b/datafusion/core/src/datasource/file_format/csv.rs index 9022e340cd36f..23ba9e6ec8736 100644 --- a/datafusion/core/src/datasource/file_format/csv.rs +++ b/datafusion/core/src/datasource/file_format/csv.rs @@ -56,6 +56,7 @@ mod tests { use async_trait::async_trait; use bytes::Bytes; use chrono::DateTime; + use datafusion_common::parsers::CompressionTypeVariant; use futures::stream::BoxStream; use futures::StreamExt; use insta::assert_snapshot; @@ -63,7 +64,7 @@ mod tests { use object_store::path::Path; use object_store::{ Attributes, GetOptions, GetResult, GetResultPayload, ListResult, MultipartUpload, - ObjectMeta, ObjectStore, PutMultipartOpts, PutOptions, PutPayload, PutResult, + ObjectMeta, ObjectStore, PutMultipartOptions, PutOptions, PutPayload, PutResult, }; use regex::Regex; use rstest::*; @@ -97,7 +98,7 @@ mod tests { async fn put_multipart_opts( &self, _location: &Path, - _opts: PutMultipartOpts, + _opts: PutMultipartOptions, ) -> object_store::Result> { unimplemented!() } @@ -796,83 +797,107 @@ mod tests { Ok(()) } + /// Read a single empty csv file with header + /// + /// empty.csv: + /// c1,c2,c3 #[tokio::test] - async fn test_csv_write_empty_file() -> Result<()> { - // Case 1. write to a single file - // Expect: an empty file created - let tmp_dir = tempfile::TempDir::new().unwrap(); - let path = format!("{}/empty.csv", tmp_dir.path().to_string_lossy()); - + async fn test_csv_empty_with_header() -> Result<()> { let ctx = SessionContext::new(); + ctx.register_csv( + "empty", + "tests/data/empty.csv", + CsvReadOptions::new().has_header(true), + ) + .await?; - let df = ctx.sql("SELECT 1 limit 0").await?; - - let cfg1 = - crate::dataframe::DataFrameWriteOptions::new().with_single_file_output(true); - let cfg2 = CsvOptions::default().with_has_header(true); - - df.write_csv(&path, cfg1, Some(cfg2)).await?; - assert!(std::path::Path::new(&path).exists()); - - // Case 2. write to a directory without partition columns - // Expect: under the directory, an empty file is created - let tmp_dir = tempfile::TempDir::new().unwrap(); - let path = format!("{}", tmp_dir.path().to_string_lossy()); + let query = "select * from empty where random() > 0.5;"; + let query_result = ctx.sql(query).await?.collect().await?; - let cfg1 = - crate::dataframe::DataFrameWriteOptions::new().with_single_file_output(true); - let cfg2 = CsvOptions::default().with_has_header(true); + assert_snapshot!(batches_to_string(&query_result),@r###" + ++ + ++ + "###); - let df = ctx.sql("SELECT 1 limit 0").await?; + Ok(()) + } - df.write_csv(&path, cfg1, Some(cfg2)).await?; - assert!(std::path::Path::new(&path).exists()); + #[tokio::test] + async fn test_csv_extension_compressed() -> Result<()> { + // Write compressed CSV files + // Expect: under the directory, a file is created with ".csv.gz" extension + let ctx = SessionContext::new(); - let files = std::fs::read_dir(&path).unwrap(); - assert!(files.count() == 1); + let df = ctx + .read_csv( + &format!("{}/csv/aggregate_test_100.csv", arrow_test_data()), + CsvReadOptions::default().has_header(true), + ) + .await?; - // Case 3. write to a directory with partition columns - // Expect: No file is created let tmp_dir = tempfile::TempDir::new().unwrap(); let path = format!("{}", tmp_dir.path().to_string_lossy()); - let df = ctx.sql("SELECT 1 as col1, 2 as col2 limit 0").await?; - - let cfg1 = crate::dataframe::DataFrameWriteOptions::new() - .with_single_file_output(true) - .with_partition_by(vec!["col1".to_string()]); - let cfg2 = CsvOptions::default().with_has_header(true); + let cfg1 = crate::dataframe::DataFrameWriteOptions::new(); + let cfg2 = CsvOptions::default() + .with_has_header(true) + .with_compression(CompressionTypeVariant::GZIP); df.write_csv(&path, cfg1, Some(cfg2)).await?; - assert!(std::path::Path::new(&path).exists()); - let files = std::fs::read_dir(&path).unwrap(); - assert!(files.count() == 0); + + let files: Vec<_> = std::fs::read_dir(&path).unwrap().collect(); + assert_eq!(files.len(), 1); + assert!(files + .last() + .unwrap() + .as_ref() + .unwrap() + .path() + .file_name() + .unwrap() + .to_str() + .unwrap() + .ends_with(".csv.gz")); Ok(()) } - /// Read a single empty csv file with header - /// - /// empty.csv: - /// c1,c2,c3 #[tokio::test] - async fn test_csv_empty_with_header() -> Result<()> { + async fn test_csv_extension_uncompressed() -> Result<()> { + // Write plain uncompressed CSV files + // Expect: under the directory, a file is created with ".csv" extension let ctx = SessionContext::new(); - ctx.register_csv( - "empty", - "tests/data/empty.csv", - CsvReadOptions::new().has_header(true), - ) - .await?; - let query = "select * from empty where random() > 0.5;"; - let query_result = ctx.sql(query).await?.collect().await?; + let df = ctx + .read_csv( + &format!("{}/csv/aggregate_test_100.csv", arrow_test_data()), + CsvReadOptions::default().has_header(true), + ) + .await?; - assert_snapshot!(batches_to_string(&query_result),@r###" - ++ - ++ - "###); + let tmp_dir = tempfile::TempDir::new().unwrap(); + let path = format!("{}", tmp_dir.path().to_string_lossy()); + + let cfg1 = crate::dataframe::DataFrameWriteOptions::new(); + let cfg2 = CsvOptions::default().with_has_header(true); + + df.write_csv(&path, cfg1, Some(cfg2)).await?; + assert!(std::path::Path::new(&path).exists()); + + let files: Vec<_> = std::fs::read_dir(&path).unwrap().collect(); + assert_eq!(files.len(), 1); + assert!(files + .last() + .unwrap() + .as_ref() + .unwrap() + .path() + .file_name() + .unwrap() + .to_str() + .unwrap() + .ends_with(".csv")); Ok(()) } diff --git a/datafusion/core/src/datasource/file_format/json.rs b/datafusion/core/src/datasource/file_format/json.rs index d818187bb3073..34d3d64f07fb2 100644 --- a/datafusion/core/src/datasource/file_format/json.rs +++ b/datafusion/core/src/datasource/file_format/json.rs @@ -31,7 +31,6 @@ mod tests { use arrow_schema::Schema; use bytes::Bytes; use datafusion_catalog::Session; - use datafusion_common::config::JsonOptions; use datafusion_common::test_util::batches_to_string; use datafusion_datasource::decoder::{ BatchDeserializer, DecoderDeserializer, DeserializerOutput, @@ -258,61 +257,6 @@ mod tests { Ok(()) } - #[tokio::test] - async fn test_json_write_empty_file() -> Result<()> { - // Case 1. write to a single file - // Expect: an empty file created - let tmp_dir = tempfile::TempDir::new().unwrap(); - let path = format!("{}/empty.json", tmp_dir.path().to_string_lossy()); - - let ctx = SessionContext::new(); - - let df = ctx.sql("SELECT 1 limit 0").await?; - - let cfg1 = - crate::dataframe::DataFrameWriteOptions::new().with_single_file_output(true); - let cfg2 = JsonOptions::default(); - - df.write_json(&path, cfg1, Some(cfg2)).await?; - assert!(std::path::Path::new(&path).exists()); - - // Case 2. write to a directory without partition columns - // Expect: under the directory, an empty file is created - let tmp_dir = tempfile::TempDir::new().unwrap(); - let path = format!("{}", tmp_dir.path().to_string_lossy()); - - let cfg1 = - crate::dataframe::DataFrameWriteOptions::new().with_single_file_output(true); - let cfg2 = JsonOptions::default(); - - let df = ctx.sql("SELECT 1 limit 0").await?; - - df.write_json(&path, cfg1, Some(cfg2)).await?; - assert!(std::path::Path::new(&path).exists()); - - let files = std::fs::read_dir(&path).unwrap(); - assert!(files.count() == 1); - - // Case 3. write to a directory with partition columns - // Expect: No file is created - let tmp_dir = tempfile::TempDir::new().unwrap(); - let path = format!("{}", tmp_dir.path().to_string_lossy()); - - let df = ctx.sql("SELECT 1 as col1, 2 as col2 limit 0").await?; - - let cfg1 = crate::dataframe::DataFrameWriteOptions::new() - .with_single_file_output(true) - .with_partition_by(vec!["col1".to_string()]); - let cfg2 = JsonOptions::default(); - - df.write_json(&path, cfg1, Some(cfg2)).await?; - - assert!(std::path::Path::new(&path).exists()); - let files = std::fs::read_dir(&path).unwrap(); - assert!(files.count() == 0); - Ok(()) - } - #[test] fn test_json_deserializer_finish() -> Result<()> { let schema = Arc::new(Schema::new(vec![ diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet.rs index 8a2db3431fa0b..9b343923f0145 100644 --- a/datafusion/core/src/datasource/file_format/parquet.rs +++ b/datafusion/core/src/datasource/file_format/parquet.rs @@ -158,7 +158,7 @@ mod tests { use object_store::ObjectMeta; use object_store::{ path::Path, GetOptions, GetResult, ListResult, MultipartUpload, ObjectStore, - PutMultipartOpts, PutOptions, PutPayload, PutResult, + PutMultipartOptions, PutOptions, PutPayload, PutResult, }; use parquet::arrow::arrow_reader::ArrowReaderOptions; use parquet::arrow::ParquetRecordBatchStreamBuilder; @@ -311,7 +311,7 @@ mod tests { async fn put_multipart_opts( &self, _location: &Path, - _opts: PutMultipartOpts, + _opts: PutMultipartOptions, ) -> object_store::Result> { Err(object_store::Error::NotImplemented) } @@ -1263,57 +1263,30 @@ mod tests { } #[tokio::test] - async fn test_parquet_write_empty_file() -> Result<()> { - // Case 1. write to a single file - // Expect: an empty file created - let tmp_dir = tempfile::TempDir::new().unwrap(); - let path = format!("{}/empty.parquet", tmp_dir.path().to_string_lossy()); - - let ctx = SessionContext::new(); - - let df = ctx.sql("SELECT 1 limit 0").await?; - - let cfg1 = - crate::dataframe::DataFrameWriteOptions::new().with_single_file_output(true); - let cfg2 = TableParquetOptions::default(); - - df.write_parquet(&path, cfg1, Some(cfg2)).await?; - assert!(std::path::Path::new(&path).exists()); + async fn test_write_empty_recordbatch_creates_file() -> Result<()> { + let empty_record_batch = RecordBatch::try_new( + Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)])), + vec![Arc::new(Int32Array::from(Vec::::new()))], + ) + .expect("Failed to create empty RecordBatch"); - // Case 2. write to a directory without partition columns - // Expect: under the directory, an empty file is created let tmp_dir = tempfile::TempDir::new().unwrap(); - let path = format!("{}", tmp_dir.path().to_string_lossy()); - - let cfg1 = - crate::dataframe::DataFrameWriteOptions::new().with_single_file_output(true); - let cfg2 = TableParquetOptions::default(); + let path = format!("{}/empty2.parquet", tmp_dir.path().to_string_lossy()); - let df = ctx.sql("SELECT 1 limit 0").await?; - - df.write_parquet(&path, cfg1, Some(cfg2)).await?; + let ctx = SessionContext::new(); + let df = ctx.read_batch(empty_record_batch.clone())?; + df.write_parquet(&path, crate::dataframe::DataFrameWriteOptions::new(), None) + .await?; assert!(std::path::Path::new(&path).exists()); - let files = std::fs::read_dir(&path).unwrap(); - assert!(files.count() == 1); - - // Case 3. write to a directory with partition columns - // Expect: No file is created - let tmp_dir = tempfile::TempDir::new().unwrap(); - let path = format!("{}", tmp_dir.path().to_string_lossy()); - - let df = ctx.sql("SELECT 1 as col1, 2 as col2 limit 0").await?; - - let cfg1 = crate::dataframe::DataFrameWriteOptions::new() - .with_single_file_output(true) - .with_partition_by(vec!["col1".to_string()]); - let cfg2 = TableParquetOptions::default(); - - df.write_parquet(&path, cfg1, Some(cfg2)).await?; - - assert!(std::path::Path::new(&path).exists()); - let files = std::fs::read_dir(&path).unwrap(); - assert!(files.count() == 0); + let stream = ctx + .read_parquet(&path, ParquetReadOptions::new()) + .await? + .execute_stream() + .await?; + assert_eq!(stream.schema(), empty_record_batch.schema()); + let results = stream.collect::>().await; + assert_eq!(results.len(), 0); Ok(()) } diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs index 3ddf1c85e241b..121ab46730b5b 100644 --- a/datafusion/core/src/datasource/listing/table.rs +++ b/datafusion/core/src/datasource/listing/table.rs @@ -36,9 +36,10 @@ use datafusion_common::{ }; use datafusion_datasource::{ compute_all_files_statistics, + file::FileSource, file_groups::FileGroup, file_scan_config::{FileScanConfig, FileScanConfigBuilder}, - schema_adapter::DefaultSchemaAdapterFactory, + schema_adapter::{DefaultSchemaAdapterFactory, SchemaAdapter, SchemaAdapterFactory}, }; use datafusion_execution::{ cache::{cache_manager::FileStatisticsCache, cache_unit::DefaultFileStatisticsCache}, @@ -47,6 +48,7 @@ use datafusion_execution::{ use datafusion_expr::{ dml::InsertOp, Expr, SortExpr, TableProviderFilterPushDown, TableType, }; +use datafusion_physical_expr::schema_rewriter::PhysicalExprAdapterFactory; use datafusion_physical_expr_common::sort_expr::LexOrdering; use datafusion_physical_plan::{empty::EmptyExec, ExecutionPlan, Statistics}; use futures::{future, stream, Stream, StreamExt, TryStreamExt}; @@ -55,10 +57,11 @@ use object_store::ObjectStore; use std::{any::Any, collections::HashMap, str::FromStr, sync::Arc}; /// Indicates the source of the schema for a [`ListingTable`] // PartialEq required for assert_eq! in tests -#[derive(Debug, Clone, Copy, PartialEq)] +#[derive(Debug, Clone, Copy, PartialEq, Default)] pub enum SchemaSource { /// Schema is not yet set (initial state) - None, + #[default] + Unset, /// Schema was inferred from first table_path Inferred, /// Schema was specified explicitly via with_schema @@ -67,8 +70,20 @@ pub enum SchemaSource { /// Configuration for creating a [`ListingTable`] /// +/// # Schema Evolution Support /// -#[derive(Debug, Clone)] +/// This configuration supports schema evolution through the optional +/// [`SchemaAdapterFactory`]. You might want to override the default factory when you need: +/// +/// - **Type coercion requirements**: When you need custom logic for converting between +/// different Arrow data types (e.g., Int32 ↔ Int64, Utf8 ↔ LargeUtf8) +/// - **Column mapping**: You need to map columns with a legacy name to a new name +/// - **Custom handling of missing columns**: By default they are filled in with nulls, but you may e.g. want to fill them in with `0` or `""`. +/// +/// If not specified, a [`DefaultSchemaAdapterFactory`] will be used, which handles +/// basic schema compatibility cases. +/// +#[derive(Debug, Clone, Default)] pub struct ListingTableConfig { /// Paths on the `ObjectStore` for creating `ListingTable`. /// They should share the same schema and object store. @@ -83,17 +98,18 @@ pub struct ListingTableConfig { pub options: Option, /// Tracks the source of the schema information schema_source: SchemaSource, + /// Optional [`SchemaAdapterFactory`] for creating schema adapters + schema_adapter_factory: Option>, + /// Optional [`PhysicalExprAdapterFactory`] for creating physical expression adapters + expr_adapter_factory: Option>, } impl ListingTableConfig { /// Creates new [`ListingTableConfig`] for reading the specified URL pub fn new(table_path: ListingTableUrl) -> Self { - let table_paths = vec![table_path]; Self { - table_paths, - file_schema: None, - options: None, - schema_source: SchemaSource::None, + table_paths: vec![table_path], + ..Default::default() } } @@ -103,9 +119,7 @@ impl ListingTableConfig { pub fn new_with_multi_paths(table_paths: Vec) -> Self { Self { table_paths, - file_schema: None, - options: None, - schema_source: SchemaSource::None, + ..Default::default() } } @@ -123,12 +137,38 @@ impl ListingTableConfig { /// /// If the schema is provided, it must contain only the fields in the file /// without the table partitioning columns. + /// + /// # Example: Specifying Table Schema + /// ```rust + /// # use std::sync::Arc; + /// # use datafusion::datasource::listing::{ListingTableConfig, ListingOptions, ListingTableUrl}; + /// # use datafusion::datasource::file_format::parquet::ParquetFormat; + /// # use arrow::datatypes::{Schema, Field, DataType}; + /// # let table_paths = ListingTableUrl::parse("file:///path/to/data").unwrap(); + /// # let listing_options = ListingOptions::new(Arc::new(ParquetFormat::default())); + /// let schema = Arc::new(Schema::new(vec![ + /// Field::new("id", DataType::Int64, false), + /// Field::new("name", DataType::Utf8, true), + /// ])); + /// + /// let config = ListingTableConfig::new(table_paths) + /// .with_listing_options(listing_options) // Set options first + /// .with_schema(schema); // Then set schema + /// ``` pub fn with_schema(self, schema: SchemaRef) -> Self { + // Note: We preserve existing options state, but downstream code may expect + // options to be set. Consider calling with_listing_options() or infer_options() + // before operations that require options to be present. + debug_assert!( + self.options.is_some() || cfg!(test), + "ListingTableConfig::with_schema called without options set. \ + Consider calling with_listing_options() or infer_options() first to avoid panics in downstream code." + ); + Self { - table_paths: self.table_paths, file_schema: Some(schema), - options: self.options, schema_source: SchemaSource::Specified, + ..self } } @@ -136,12 +176,33 @@ impl ListingTableConfig { /// /// If not provided, format and other options are inferred via /// [`Self::infer_options`]. + /// + /// # Example: Configuring Parquet Files with Custom Options + /// ```rust + /// # use std::sync::Arc; + /// # use datafusion::datasource::listing::{ListingTableConfig, ListingOptions, ListingTableUrl}; + /// # use datafusion::datasource::file_format::parquet::ParquetFormat; + /// # let table_paths = ListingTableUrl::parse("file:///path/to/data").unwrap(); + /// let options = ListingOptions::new(Arc::new(ParquetFormat::default())) + /// .with_file_extension(".parquet") + /// .with_collect_stat(true); + /// + /// let config = ListingTableConfig::new(table_paths) + /// .with_listing_options(options); // Configure file format and options + /// ``` pub fn with_listing_options(self, listing_options: ListingOptions) -> Self { + // Note: This method properly sets options, but be aware that downstream + // methods like infer_schema() and try_new() require both schema and options + // to be set to function correctly. + debug_assert!( + !self.table_paths.is_empty() || cfg!(test), + "ListingTableConfig::with_listing_options called without table_paths set. \ + Consider calling new() or new_with_multi_paths() first to establish table paths." + ); + Self { - table_paths: self.table_paths, - file_schema: self.file_schema, options: Some(listing_options), - schema_source: self.schema_source, + ..self } } @@ -222,6 +283,8 @@ impl ListingTableConfig { file_schema: self.file_schema, options: Some(listing_options), schema_source: self.schema_source, + schema_adapter_factory: self.schema_adapter_factory, + expr_adapter_factory: self.expr_adapter_factory, }) } @@ -240,6 +303,8 @@ impl ListingTableConfig { file_schema, options: _, schema_source, + schema_adapter_factory, + expr_adapter_factory: physical_expr_adapter_factory, } = self; let (schema, new_schema_source) = match file_schema { @@ -261,6 +326,8 @@ impl ListingTableConfig { file_schema: Some(schema), options: Some(options), schema_source: new_schema_source, + schema_adapter_factory, + expr_adapter_factory: physical_expr_adapter_factory, }) } None => internal_err!("No `ListingOptions` set for inferring schema"), @@ -302,11 +369,79 @@ impl ListingTableConfig { file_schema: self.file_schema, options: Some(options), schema_source: self.schema_source, + schema_adapter_factory: self.schema_adapter_factory, + expr_adapter_factory: self.expr_adapter_factory, }) } None => config_err!("No `ListingOptions` set for inferring schema"), } } + + /// Set the [`SchemaAdapterFactory`] for the [`ListingTable`] + /// + /// The schema adapter factory is used to create schema adapters that can + /// handle schema evolution and type conversions when reading files with + /// different schemas than the table schema. + /// + /// If not provided, a default schema adapter factory will be used. + /// + /// # Example: Custom Schema Adapter for Type Coercion + /// ```rust + /// # use std::sync::Arc; + /// # use datafusion::datasource::listing::{ListingTableConfig, ListingOptions, ListingTableUrl}; + /// # use datafusion::datasource::schema_adapter::{SchemaAdapterFactory, SchemaAdapter}; + /// # use datafusion::datasource::file_format::parquet::ParquetFormat; + /// # use arrow::datatypes::{SchemaRef, Schema, Field, DataType}; + /// # + /// # #[derive(Debug)] + /// # struct MySchemaAdapterFactory; + /// # impl SchemaAdapterFactory for MySchemaAdapterFactory { + /// # fn create(&self, _projected_table_schema: SchemaRef, _file_schema: SchemaRef) -> Box { + /// # unimplemented!() + /// # } + /// # } + /// # let table_paths = ListingTableUrl::parse("file:///path/to/data").unwrap(); + /// # let listing_options = ListingOptions::new(Arc::new(ParquetFormat::default())); + /// # let table_schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int64, false)])); + /// let config = ListingTableConfig::new(table_paths) + /// .with_listing_options(listing_options) + /// .with_schema(table_schema) + /// .with_schema_adapter_factory(Arc::new(MySchemaAdapterFactory)); + /// ``` + pub fn with_schema_adapter_factory( + self, + schema_adapter_factory: Arc, + ) -> Self { + Self { + schema_adapter_factory: Some(schema_adapter_factory), + ..self + } + } + + /// Get the [`SchemaAdapterFactory`] for this configuration + pub fn schema_adapter_factory(&self) -> Option<&Arc> { + self.schema_adapter_factory.as_ref() + } + + /// Set the [`PhysicalExprAdapterFactory`] for the [`ListingTable`] + /// + /// The expression adapter factory is used to create physical expression adapters that can + /// handle schema evolution and type conversions when evaluating expressions + /// with different schemas than the table schema. + /// + /// If not provided, a default physical expression adapter factory will be used unless a custom + /// `SchemaAdapterFactory` is set, in which case only the `SchemaAdapterFactory` will be used. + /// + /// See for details on this transition. + pub fn with_expr_adapter_factory( + self, + expr_adapter_factory: Arc, + ) -> Self { + Self { + expr_adapter_factory: Some(expr_adapter_factory), + ..self + } + } } /// Options for creating a [`ListingTable`] @@ -801,6 +936,10 @@ pub struct ListingTable { collected_statistics: FileStatisticsCache, constraints: Constraints, column_defaults: HashMap, + /// Optional [`SchemaAdapterFactory`] for creating schema adapters + schema_adapter_factory: Option>, + /// Optional [`PhysicalExprAdapterFactory`] for creating physical expression adapters + expr_adapter_factory: Option>, } impl ListingTable { @@ -841,6 +980,8 @@ impl ListingTable { collected_statistics: Arc::new(DefaultFileStatisticsCache::default()), constraints: Constraints::default(), column_defaults: HashMap::new(), + schema_adapter_factory: config.schema_adapter_factory, + expr_adapter_factory: config.expr_adapter_factory, }; Ok(table) @@ -894,6 +1035,70 @@ impl ListingTable { self.schema_source } + /// Set the [`SchemaAdapterFactory`] for this [`ListingTable`] + /// + /// The schema adapter factory is used to create schema adapters that can + /// handle schema evolution and type conversions when reading files with + /// different schemas than the table schema. + /// + /// # Example: Adding Schema Evolution Support + /// ```rust + /// # use std::sync::Arc; + /// # use datafusion::datasource::listing::{ListingTable, ListingTableConfig, ListingOptions, ListingTableUrl}; + /// # use datafusion::datasource::schema_adapter::{DefaultSchemaAdapterFactory, SchemaAdapter}; + /// # use datafusion::datasource::file_format::parquet::ParquetFormat; + /// # use arrow::datatypes::{SchemaRef, Schema, Field, DataType}; + /// # let table_path = ListingTableUrl::parse("file:///path/to/data").unwrap(); + /// # let options = ListingOptions::new(Arc::new(ParquetFormat::default())); + /// # let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int64, false)])); + /// # let config = ListingTableConfig::new(table_path).with_listing_options(options).with_schema(schema); + /// # let table = ListingTable::try_new(config).unwrap(); + /// let table_with_evolution = table + /// .with_schema_adapter_factory(Arc::new(DefaultSchemaAdapterFactory)); + /// ``` + /// See [`ListingTableConfig::with_schema_adapter_factory`] for an example of custom SchemaAdapterFactory. + pub fn with_schema_adapter_factory( + self, + schema_adapter_factory: Arc, + ) -> Self { + Self { + schema_adapter_factory: Some(schema_adapter_factory), + ..self + } + } + + /// Get the [`SchemaAdapterFactory`] for this table + pub fn schema_adapter_factory(&self) -> Option<&Arc> { + self.schema_adapter_factory.as_ref() + } + + /// Creates a schema adapter for mapping between file and table schemas + /// + /// Uses the configured schema adapter factory if available, otherwise falls back + /// to the default implementation. + fn create_schema_adapter(&self) -> Box { + let table_schema = self.schema(); + match &self.schema_adapter_factory { + Some(factory) => { + factory.create_with_projected_schema(Arc::clone(&table_schema)) + } + None => DefaultSchemaAdapterFactory::from_schema(Arc::clone(&table_schema)), + } + } + + /// Creates a file source and applies schema adapter factory if available + fn create_file_source_with_schema_adapter(&self) -> Result> { + let mut source = self.options.format.file_source(); + // Apply schema adapter to source if available + // + // The source will use this SchemaAdapter to adapt data batches as they flow up the plan. + // Note: ListingTable also creates a SchemaAdapter in `scan()` but that is only used to adapt collected statistics. + if let Some(factory) = &self.schema_adapter_factory { + source = source.with_schema_adapter_factory(Arc::clone(factory))?; + } + Ok(source) + } + /// If file_sort_order is specified, creates the appropriate physical expressions fn try_create_output_ordering(&self) -> Result> { create_ordering(&self.table_schema, &self.options.file_sort_order) @@ -1002,6 +1207,8 @@ impl TableProvider for ListingTable { return Ok(Arc::new(EmptyExec::new(Arc::new(Schema::empty())))); }; + let file_source = self.create_file_source_with_schema_adapter()?; + // create the execution plan self.options .format @@ -1010,7 +1217,7 @@ impl TableProvider for ListingTable { FileScanConfigBuilder::new( object_store_url, Arc::clone(&self.file_schema), - self.options.format.file_source(), + file_source, ) .with_file_groups(partitioned_file_lists) .with_constraints(self.constraints.clone()) @@ -1019,6 +1226,7 @@ impl TableProvider for ListingTable { .with_limit(limit) .with_output_ordering(output_ordering) .with_table_partition_cols(table_partition_cols) + .with_expr_adapter(self.expr_adapter_factory.clone()) .build(), ) .await @@ -1169,8 +1377,10 @@ impl ListingTable { self.options.collect_stat, inexact_stats, )?; - let (schema_mapper, _) = DefaultSchemaAdapterFactory::from_schema(self.schema()) - .map_schema(self.file_schema.as_ref())?; + + let schema_adapter = self.create_schema_adapter(); + let (schema_mapper, _) = schema_adapter.map_schema(self.file_schema.as_ref())?; + stats.column_statistics = schema_mapper.map_column_statistics(&stats.column_statistics)?; file_groups.iter_mut().try_for_each(|file_group| { @@ -1320,15 +1530,21 @@ mod tests { assert_contains, stats::Precision, test_util::{batches_to_string, datafusion_test_data}, - ScalarValue, + ColumnStatistics, ScalarValue, + }; + use datafusion_datasource::schema_adapter::{ + SchemaAdapter, SchemaAdapterFactory, SchemaMapper, }; use datafusion_expr::{BinaryExpr, LogicalPlanBuilder, Operator}; use datafusion_physical_expr::PhysicalSortExpr; use datafusion_physical_plan::{collect, ExecutionPlanProperties}; + use rstest::rstest; use std::io::Write; use tempfile::TempDir; use url::Url; + const DUMMY_NULL_COUNT: Precision = Precision::Exact(42); + /// Creates a test schema with standard field types used in tests fn create_test_schema() -> SchemaRef { Arc::new(Schema::new(vec![ @@ -1364,7 +1580,7 @@ mod tests { // Test default schema source let config = ListingTableConfig::new(table_path.clone()); - assert_eq!(config.schema_source(), SchemaSource::None); + assert_eq!(config.schema_source(), SchemaSource::Unset); // Test schema source after setting a schema explicitly let provided_schema = create_test_schema(); @@ -1375,7 +1591,7 @@ mod tests { let format = CsvFormat::default(); let options = ListingOptions::new(Arc::new(format)); let config_with_options = config.with_listing_options(options.clone()); - assert_eq!(config_with_options.schema_source(), SchemaSource::None); + assert_eq!(config_with_options.schema_source(), SchemaSource::Unset); let config_with_inferred = config_with_options.infer_schema(&ctx.state()).await?; assert_eq!(config_with_inferred.schema_source(), SchemaSource::Inferred); @@ -1810,7 +2026,6 @@ mod tests { #[tokio::test] async fn test_insert_into_append_new_parquet_files_session_overrides() -> Result<()> { let mut config_map: HashMap = HashMap::new(); - config_map.insert("datafusion.execution.batch_size".into(), "10".into()); config_map.insert( "datafusion.execution.soft_max_rows_per_output_file".into(), "10".into(), @@ -1875,7 +2090,7 @@ mod tests { "datafusion.execution.parquet.write_batch_size".into(), "5".into(), ); - config_map.insert("datafusion.execution.batch_size".into(), "1".into()); + config_map.insert("datafusion.execution.batch_size".into(), "10".into()); helper_test_append_new_files_to_table( ParquetFormat::default().get_ext(), FileCompressionType::UNCOMPRESSED, @@ -2553,4 +2768,262 @@ mod tests { Ok(()) } + + #[tokio::test] + async fn test_statistics_mapping_with_custom_factory() -> Result<()> { + let ctx = SessionContext::new(); + let table = create_test_listing_table_with_json_and_adapter( + &ctx, + false, + // NullStatsAdapterFactory sets column_statistics null_count to DUMMY_NULL_COUNT + Arc::new(NullStatsAdapterFactory {}), + )?; + + let (groups, stats) = table.list_files_for_scan(&ctx.state(), &[], None).await?; + + assert_eq!(stats.column_statistics[0].null_count, DUMMY_NULL_COUNT); + for g in groups { + if let Some(s) = g.file_statistics(None) { + assert_eq!(s.column_statistics[0].null_count, DUMMY_NULL_COUNT); + } + } + + Ok(()) + } + + #[tokio::test] + async fn test_statistics_mapping_with_default_factory() -> Result<()> { + let ctx = SessionContext::new(); + + // Create a table without providing a custom schema adapter factory + // This should fall back to using DefaultSchemaAdapterFactory + let path = "table/file.json"; + register_test_store(&ctx, &[(path, 10)]); + + let format = JsonFormat::default(); + let opt = ListingOptions::new(Arc::new(format)).with_collect_stat(false); + let schema = Schema::new(vec![Field::new("a", DataType::Boolean, false)]); + let table_path = ListingTableUrl::parse("test:///table/").unwrap(); + + let config = ListingTableConfig::new(table_path) + .with_listing_options(opt) + .with_schema(Arc::new(schema)); + // Note: NOT calling .with_schema_adapter_factory() to test default behavior + + let table = ListingTable::try_new(config)?; + + // Verify that no custom schema adapter factory is set + assert!(table.schema_adapter_factory().is_none()); + + // The scan should work correctly with the default schema adapter + let scan_result = table.scan(&ctx.state(), None, &[], None).await; + assert!( + scan_result.is_ok(), + "Scan should succeed with default schema adapter" + ); + + // Verify that the default adapter handles basic schema compatibility + let (groups, _stats) = table.list_files_for_scan(&ctx.state(), &[], None).await?; + assert!( + !groups.is_empty(), + "Should list files successfully with default adapter" + ); + + Ok(()) + } + + #[rstest] + #[case(MapSchemaError::TypeIncompatible, "Cannot map incompatible types")] + #[case(MapSchemaError::GeneralFailure, "Schema adapter mapping failed")] + #[case( + MapSchemaError::InvalidProjection, + "Invalid projection in schema mapping" + )] + #[tokio::test] + async fn test_schema_adapter_map_schema_errors( + #[case] error_type: MapSchemaError, + #[case] expected_error_msg: &str, + ) -> Result<()> { + let ctx = SessionContext::new(); + let table = create_test_listing_table_with_json_and_adapter( + &ctx, + false, + Arc::new(FailingMapSchemaAdapterFactory { error_type }), + )?; + + // The error should bubble up from the scan operation when schema mapping fails + let scan_result = table.scan(&ctx.state(), None, &[], None).await; + + assert!(scan_result.is_err()); + let error_msg = scan_result.unwrap_err().to_string(); + assert!( + error_msg.contains(expected_error_msg), + "Expected error containing '{expected_error_msg}', got: {error_msg}" + ); + + Ok(()) + } + + // Test that errors during file listing also bubble up correctly + #[tokio::test] + async fn test_schema_adapter_error_during_file_listing() -> Result<()> { + let ctx = SessionContext::new(); + let table = create_test_listing_table_with_json_and_adapter( + &ctx, + true, + Arc::new(FailingMapSchemaAdapterFactory { + error_type: MapSchemaError::TypeIncompatible, + }), + )?; + + // The error should bubble up from list_files_for_scan when collecting statistics + let list_result = table.list_files_for_scan(&ctx.state(), &[], None).await; + + assert!(list_result.is_err()); + let error_msg = list_result.unwrap_err().to_string(); + assert!( + error_msg.contains("Cannot map incompatible types"), + "Expected type incompatibility error during file listing, got: {error_msg}" + ); + + Ok(()) + } + + #[derive(Debug, Copy, Clone)] + enum MapSchemaError { + TypeIncompatible, + GeneralFailure, + InvalidProjection, + } + + #[derive(Debug)] + struct FailingMapSchemaAdapterFactory { + error_type: MapSchemaError, + } + + impl SchemaAdapterFactory for FailingMapSchemaAdapterFactory { + fn create( + &self, + projected_table_schema: SchemaRef, + _table_schema: SchemaRef, + ) -> Box { + Box::new(FailingMapSchemaAdapter { + schema: projected_table_schema, + error_type: self.error_type, + }) + } + } + + #[derive(Debug)] + struct FailingMapSchemaAdapter { + schema: SchemaRef, + error_type: MapSchemaError, + } + + impl SchemaAdapter for FailingMapSchemaAdapter { + fn map_column_index(&self, index: usize, file_schema: &Schema) -> Option { + let field = self.schema.field(index); + file_schema.fields.find(field.name()).map(|(i, _)| i) + } + + fn map_schema( + &self, + _file_schema: &Schema, + ) -> Result<(Arc, Vec)> { + // Always fail with different error types based on the configured error_type + match self.error_type { + MapSchemaError::TypeIncompatible => { + plan_err!( + "Cannot map incompatible types: Boolean cannot be cast to Utf8" + ) + } + MapSchemaError::GeneralFailure => { + plan_err!("Schema adapter mapping failed due to internal error") + } + MapSchemaError::InvalidProjection => { + plan_err!("Invalid projection in schema mapping: column index out of bounds") + } + } + } + } + + #[derive(Debug)] + struct NullStatsAdapterFactory; + + impl SchemaAdapterFactory for NullStatsAdapterFactory { + fn create( + &self, + projected_table_schema: SchemaRef, + _table_schema: SchemaRef, + ) -> Box { + Box::new(NullStatsAdapter { + schema: projected_table_schema, + }) + } + } + + #[derive(Debug)] + struct NullStatsAdapter { + schema: SchemaRef, + } + + impl SchemaAdapter for NullStatsAdapter { + fn map_column_index(&self, index: usize, file_schema: &Schema) -> Option { + let field = self.schema.field(index); + file_schema.fields.find(field.name()).map(|(i, _)| i) + } + + fn map_schema( + &self, + file_schema: &Schema, + ) -> Result<(Arc, Vec)> { + let projection = (0..file_schema.fields().len()).collect(); + Ok((Arc::new(NullStatsMapper {}), projection)) + } + } + + #[derive(Debug)] + struct NullStatsMapper; + + impl SchemaMapper for NullStatsMapper { + fn map_batch(&self, batch: RecordBatch) -> Result { + Ok(batch) + } + + fn map_column_statistics( + &self, + stats: &[ColumnStatistics], + ) -> Result> { + Ok(stats + .iter() + .map(|s| { + let mut s = s.clone(); + s.null_count = DUMMY_NULL_COUNT; + s + }) + .collect()) + } + } + + /// Helper function to create a test ListingTable with JSON format and custom schema adapter factory + fn create_test_listing_table_with_json_and_adapter( + ctx: &SessionContext, + collect_stat: bool, + schema_adapter_factory: Arc, + ) -> Result { + let path = "table/file.json"; + register_test_store(ctx, &[(path, 10)]); + + let format = JsonFormat::default(); + let opt = ListingOptions::new(Arc::new(format)).with_collect_stat(collect_stat); + let schema = Schema::new(vec![Field::new("a", DataType::Boolean, false)]); + let table_path = ListingTableUrl::parse("test:///table/").unwrap(); + + let config = ListingTableConfig::new(table_path) + .with_listing_options(opt) + .with_schema(Arc::new(schema)) + .with_schema_adapter_factory(schema_adapter_factory); + + ListingTable::try_new(config) + } } diff --git a/datafusion/core/src/datasource/listing_table_factory.rs b/datafusion/core/src/datasource/listing_table_factory.rs index 580fa4be47afb..80dcdc1f34626 100644 --- a/datafusion/core/src/datasource/listing_table_factory.rs +++ b/datafusion/core/src/datasource/listing_table_factory.rs @@ -128,9 +128,21 @@ impl TableProviderFactory for ListingTableFactory { // if the folder then rewrite a file path as 'path/*.parquet' // to only read the files the reader can understand if table_path.is_folder() && table_path.get_glob().is_none() { - table_path = table_path.with_glob( - format!("*.{}", cmd.file_type.to_lowercase()).as_ref(), - )?; + // Since there are no files yet to infer an actual extension, + // derive the pattern based on compression type. + // So for gzipped CSV the pattern is `*.csv.gz` + let glob = match options.format.compression_type() { + Some(compression) => { + match options.format.get_ext_with_compression(&compression) { + // Use glob based on `FileFormat` extension + Ok(ext) => format!("*.{ext}"), + // Fallback to `file_type`, if not supported by `FileFormat` + Err(_) => format!("*.{}", cmd.file_type.to_lowercase()), + } + } + None => format!("*.{}", cmd.file_type.to_lowercase()), + }; + table_path = table_path.with_glob(glob.as_ref())?; } let schema = options.infer_schema(session_state, &table_path).await?; let df_schema = Arc::clone(&schema).to_dfschema()?; @@ -175,6 +187,7 @@ fn get_extension(path: &str) -> String { #[cfg(test)] mod tests { + use glob::Pattern; use std::collections::HashMap; use super::*; @@ -182,6 +195,7 @@ mod tests { datasource::file_format::csv::CsvFormat, execution::context::SessionContext, }; + use datafusion_common::parsers::CompressionTypeVariant; use datafusion_common::{Constraints, DFSchema, TableReference}; #[tokio::test] @@ -264,4 +278,101 @@ mod tests { let listing_options = listing_table.options(); assert_eq!(".tbl", listing_options.file_extension); } + + /// Validates that CreateExternalTable with compression + /// searches for gzipped files in a directory location + #[tokio::test] + async fn test_create_using_folder_with_compression() { + let dir = tempfile::tempdir().unwrap(); + + let factory = ListingTableFactory::new(); + let context = SessionContext::new(); + let state = context.state(); + let name = TableReference::bare("foo"); + + let mut options = HashMap::new(); + options.insert("format.schema_infer_max_rec".to_owned(), "1000".to_owned()); + options.insert("format.has_header".into(), "true".into()); + options.insert("format.compression".into(), "gzip".into()); + let cmd = CreateExternalTable { + name, + location: dir.path().to_str().unwrap().to_string(), + file_type: "csv".to_string(), + schema: Arc::new(DFSchema::empty()), + table_partition_cols: vec![], + if_not_exists: false, + temporary: false, + definition: None, + order_exprs: vec![], + unbounded: false, + options, + constraints: Constraints::default(), + column_defaults: HashMap::new(), + }; + let table_provider = factory.create(&state, &cmd).await.unwrap(); + let listing_table = table_provider + .as_any() + .downcast_ref::() + .unwrap(); + + // Verify compression is used + let format = listing_table.options().format.clone(); + let csv_format = format.as_any().downcast_ref::().unwrap(); + let csv_options = csv_format.options().clone(); + assert_eq!(csv_options.compression, CompressionTypeVariant::GZIP); + + let listing_options = listing_table.options(); + assert_eq!("", listing_options.file_extension); + // Glob pattern is set to search for gzipped files + let table_path = listing_table.table_paths().first().unwrap(); + assert_eq!( + table_path.get_glob().clone().unwrap(), + Pattern::new("*.csv.gz").unwrap() + ); + } + + /// Validates that CreateExternalTable without compression + /// searches for normal files in a directory location + #[tokio::test] + async fn test_create_using_folder_without_compression() { + let dir = tempfile::tempdir().unwrap(); + + let factory = ListingTableFactory::new(); + let context = SessionContext::new(); + let state = context.state(); + let name = TableReference::bare("foo"); + + let mut options = HashMap::new(); + options.insert("format.schema_infer_max_rec".to_owned(), "1000".to_owned()); + options.insert("format.has_header".into(), "true".into()); + let cmd = CreateExternalTable { + name, + location: dir.path().to_str().unwrap().to_string(), + file_type: "csv".to_string(), + schema: Arc::new(DFSchema::empty()), + table_partition_cols: vec![], + if_not_exists: false, + temporary: false, + definition: None, + order_exprs: vec![], + unbounded: false, + options, + constraints: Constraints::default(), + column_defaults: HashMap::new(), + }; + let table_provider = factory.create(&state, &cmd).await.unwrap(); + let listing_table = table_provider + .as_any() + .downcast_ref::() + .unwrap(); + + let listing_options = listing_table.options(); + assert_eq!("", listing_options.file_extension); + // Glob pattern is set to search for gzipped files + let table_path = listing_table.table_paths().first().unwrap(); + assert_eq!( + table_path.get_glob().clone().unwrap(), + Pattern::new("*.csv").unwrap() + ); + } } diff --git a/datafusion/core/src/datasource/memory_test.rs b/datafusion/core/src/datasource/memory_test.rs index 381000ab8ee1e..c16837c73b4f1 100644 --- a/datafusion/core/src/datasource/memory_test.rs +++ b/datafusion/core/src/datasource/memory_test.rs @@ -130,12 +130,15 @@ mod tests { .scan(&session_ctx.state(), Some(&projection), &[], None) .await { - Err(DataFusionError::ArrowError(ArrowError::SchemaError(e), _)) => { - assert_eq!( - "\"project index 4 out of bounds, max field 3\"", - format!("{e:?}") - ) - } + Err(DataFusionError::ArrowError(err, _)) => match err.as_ref() { + ArrowError::SchemaError(e) => { + assert_eq!( + "\"project index 4 out of bounds, max field 3\"", + format!("{e:?}") + ) + } + _ => panic!("unexpected error"), + }, res => panic!("Scan should failed on invalid projection, got {res:?}"), }; @@ -443,7 +446,7 @@ mod tests { .unwrap_err(); // Ensure that there is a descriptive error message assert_eq!( - "Error during planning: Cannot insert into MemTable with zero partitions", + "Error during planning: No partitions provided, expected at least one partition", experiment_result.strip_backtrace() ); Ok(()) diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index dcab71f324404..46b4781865dbb 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -226,7 +226,7 @@ where /// # use datafusion::execution::SessionStateBuilder; /// # use datafusion_execution::runtime_env::RuntimeEnvBuilder; /// // Configure a 4k batch size -/// let config = SessionConfig::new() .with_batch_size(4 * 1024); +/// let config = SessionConfig::new().with_batch_size(4 * 1024); /// /// // configure a memory limit of 1GB with 20% slop /// let runtime_env = RuntimeEnvBuilder::new() diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs index 9d3d5e645890e..81276bca77cc0 100644 --- a/datafusion/core/src/execution/session_state.rs +++ b/datafusion/core/src/execution/session_state.rs @@ -434,7 +434,7 @@ impl SessionState { .with_dialect(dialect.as_ref()) .with_recursion_limit(recursion_limit) .build()? - .parse_expr()?; + .parse_into_expr()?; Ok(expr) } diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs index 90cc0b572fefa..e1f41543240b8 100644 --- a/datafusion/core/src/physical_planner.rs +++ b/datafusion/core/src/physical_planner.rs @@ -93,6 +93,7 @@ use datafusion_physical_plan::execution_plan::InvariantLevel; use datafusion_physical_plan::placeholder_row::PlaceholderRowExec; use datafusion_physical_plan::recursive_query::RecursiveQueryExec; use datafusion_physical_plan::unnest::ListUnnest; +use datafusion_sql::TableReference; use sqlparser::ast::NullTreatment; use async_trait::async_trait; @@ -504,6 +505,7 @@ impl DefaultPhysicalPlanner { file_type, partition_by, options: source_option_tuples, + output_schema: _, }) => { let original_url = output_url.clone(); let input_exec = children.one()?; @@ -533,6 +535,14 @@ impl DefaultPhysicalPlanner { let sink_format = file_type_to_format(file_type)? .create(session_state, source_option_tuples)?; + // Determine extension based on format extension and compression + let file_extension = match sink_format.compression_type() { + Some(compression_type) => sink_format + .get_ext_with_compression(&compression_type) + .unwrap_or_else(|_| sink_format.get_ext()), + None => sink_format.get_ext(), + }; + // Set file sink related options let config = FileSinkConfig { original_url, @@ -543,11 +553,18 @@ impl DefaultPhysicalPlanner { table_partition_cols, insert_op: InsertOp::Append, keep_partition_by_columns, - file_extension: sink_format.get_ext(), + file_extension, }; + let ordering = input_exec.properties().output_ordering().cloned(); + sink_format - .create_writer_physical_plan(input_exec, session_state, config, None) + .create_writer_physical_plan( + input_exec, + session_state, + config, + ordering.map(Into::into), + ) .await? } LogicalPlan::Dml(DmlStatement { @@ -931,8 +948,8 @@ impl DefaultPhysicalPlanner { // 2 Children LogicalPlan::Join(Join { - left, - right, + left: original_left, + right: original_right, on: keys, filter, join_type, @@ -955,23 +972,25 @@ impl DefaultPhysicalPlanner { let (left, left_col_keys, left_projected) = wrap_projection_for_join_if_necessary( &left_keys, - left.as_ref().clone(), + original_left.as_ref().clone(), )?; let (right, right_col_keys, right_projected) = wrap_projection_for_join_if_necessary( &right_keys, - right.as_ref().clone(), + original_right.as_ref().clone(), )?; let column_on = (left_col_keys, right_col_keys); let left = Arc::new(left); let right = Arc::new(right); - let new_join = LogicalPlan::Join(Join::try_new_with_project_input( + let (new_join, requalified) = Join::try_new_with_project_input( node, Arc::clone(&left), Arc::clone(&right), column_on, - )?); + )?; + + let new_join = LogicalPlan::Join(new_join); // If inputs were projected then create ExecutionPlan for these new // LogicalPlan nodes. @@ -1004,8 +1023,24 @@ impl DefaultPhysicalPlanner { // Remove temporary projected columns if left_projected || right_projected { - let final_join_result = - join_schema.iter().map(Expr::from).collect::>(); + // Re-qualify the join schema only if the inputs were previously requalified in + // `try_new_with_project_input`. This ensures that when building the Projection + // it can correctly resolve field nullability and data types + // by disambiguating fields from the left and right sides of the join. + let qualified_join_schema = if requalified { + Arc::new(qualify_join_schema_sides( + join_schema, + original_left, + original_right, + )?) + } else { + Arc::clone(join_schema) + }; + + let final_join_result = qualified_join_schema + .iter() + .map(Expr::from) + .collect::>(); let projection = LogicalPlan::Projection(Projection::try_new( final_join_result, Arc::new(new_join), @@ -1323,6 +1358,9 @@ impl DefaultPhysicalPlanner { physical_name(expr), ))?])), } + } else if group_expr.is_empty() { + // No GROUP BY clause - create empty PhysicalGroupBy + Ok(PhysicalGroupBy::new(vec![], vec![], vec![])) } else { Ok(PhysicalGroupBy::new_single( group_expr @@ -1502,6 +1540,64 @@ fn get_null_physical_expr_pair( Ok((Arc::new(null_value), physical_name)) } +/// Qualifies the fields in a join schema with "left" and "right" qualifiers +/// without mutating the original schema. This function should only be used when +/// the join inputs have already been requalified earlier in `try_new_with_project_input`. +/// +/// The purpose is to avoid ambiguity errors later in planning (e.g., in nullability or data type resolution) +/// when converting expressions to fields. +fn qualify_join_schema_sides( + join_schema: &DFSchema, + left: &LogicalPlan, + right: &LogicalPlan, +) -> Result { + let left_fields = left.schema().fields(); + let right_fields = right.schema().fields(); + let join_fields = join_schema.fields(); + + // Validate lengths + if join_fields.len() != left_fields.len() + right_fields.len() { + return internal_err!( + "Join schema field count must match left and right field count." + ); + } + + // Validate field names match + for (i, (field, expected)) in join_fields + .iter() + .zip(left_fields.iter().chain(right_fields.iter())) + .enumerate() + { + if field.name() != expected.name() { + return internal_err!( + "Field name mismatch at index {}: expected '{}', found '{}'", + i, + expected.name(), + field.name() + ); + } + } + + // qualify sides + let qualifiers = join_fields + .iter() + .enumerate() + .map(|(i, _)| { + if i < left_fields.len() { + Some(TableReference::Bare { + table: Arc::from("left"), + }) + } else { + Some(TableReference::Bare { + table: Arc::from("right"), + }) + } + }) + .collect(); + + join_schema.with_field_specific_qualified_schema(qualifiers) +} + fn get_physical_expr_pair( expr: &Expr, input_dfschema: &DFSchema, @@ -1651,14 +1747,11 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter( == NullTreatment::IgnoreNulls; let (agg_expr, filter, order_bys) = { - let order_bys = match order_by { - Some(exprs) => create_physical_sort_exprs( - exprs, - logical_input_schema, - execution_props, - )?, - None => vec![], - }; + let order_bys = create_physical_sort_exprs( + order_by, + logical_input_schema, + execution_props, + )?; let agg_expr = AggregateExprBuilder::new(func.to_owned(), physical_args.to_vec()) @@ -1766,6 +1859,7 @@ impl DefaultPhysicalPlanner { stringified_plans.push(StringifiedPlan::new( FinalPhysicalPlan, displayable(optimized_plan.as_ref()) + .set_tree_maximum_render_width(config.tree_maximum_render_width) .tree_render() .to_string(), )); diff --git a/datafusion/core/src/test/mod.rs b/datafusion/core/src/test/mod.rs index 8719a16f4919f..68f83e7f1f115 100644 --- a/datafusion/core/src/test/mod.rs +++ b/datafusion/core/src/test/mod.rs @@ -38,6 +38,7 @@ use crate::test_util::{aggr_test_schema, arrow_test_data}; use arrow::array::{self, Array, ArrayRef, Decimal128Builder, Int32Array}; use arrow::datatypes::{DataType, Field, Schema}; use arrow::record_batch::RecordBatch; +#[cfg(feature = "compression")] use datafusion_common::DataFusionError; use datafusion_datasource::source::DataSourceExec; diff --git a/datafusion/core/src/test/object_store.rs b/datafusion/core/src/test/object_store.rs index ed8474bbfc812..761f60d645d24 100644 --- a/datafusion/core/src/test/object_store.rs +++ b/datafusion/core/src/test/object_store.rs @@ -24,8 +24,8 @@ use futures::stream::BoxStream; use futures::FutureExt; use object_store::{ memory::InMemory, path::Path, Error, GetOptions, GetResult, ListResult, - MultipartUpload, ObjectMeta, ObjectStore, PutMultipartOpts, PutOptions, PutPayload, - PutResult, + MultipartUpload, ObjectMeta, ObjectStore, PutMultipartOptions, PutOptions, + PutPayload, PutResult, }; use std::fmt::{Debug, Display, Formatter}; use std::sync::Arc; @@ -118,7 +118,7 @@ impl ObjectStore for BlockingObjectStore { async fn put_multipart_opts( &self, location: &Path, - opts: PutMultipartOpts, + opts: PutMultipartOptions, ) -> object_store::Result> { self.inner.put_multipart_opts(location, opts).await } diff --git a/datafusion/core/tests/data/tpch_customer_small.parquet b/datafusion/core/tests/data/tpch_customer_small.parquet new file mode 100644 index 0000000000000..3d5f73ef3a066 Binary files /dev/null and b/datafusion/core/tests/data/tpch_customer_small.parquet differ diff --git a/datafusion/core/tests/data/tpch_lineitem_small.parquet b/datafusion/core/tests/data/tpch_lineitem_small.parquet new file mode 100644 index 0000000000000..5e98706669d3b Binary files /dev/null and b/datafusion/core/tests/data/tpch_lineitem_small.parquet differ diff --git a/datafusion/core/tests/data/tpch_nation_small.parquet b/datafusion/core/tests/data/tpch_nation_small.parquet new file mode 100644 index 0000000000000..99da99594cf89 Binary files /dev/null and b/datafusion/core/tests/data/tpch_nation_small.parquet differ diff --git a/datafusion/core/tests/data/tpch_orders_small.parquet b/datafusion/core/tests/data/tpch_orders_small.parquet new file mode 100644 index 0000000000000..79e043137caf6 Binary files /dev/null and b/datafusion/core/tests/data/tpch_orders_small.parquet differ diff --git a/datafusion/core/tests/data/tpch_part_small.parquet b/datafusion/core/tests/data/tpch_part_small.parquet new file mode 100644 index 0000000000000..d8e1d7d680aa2 Binary files /dev/null and b/datafusion/core/tests/data/tpch_part_small.parquet differ diff --git a/datafusion/core/tests/data/tpch_partsupp_small.parquet b/datafusion/core/tests/data/tpch_partsupp_small.parquet new file mode 100644 index 0000000000000..711d58dda7493 Binary files /dev/null and b/datafusion/core/tests/data/tpch_partsupp_small.parquet differ diff --git a/datafusion/core/tests/data/tpch_region_small.parquet b/datafusion/core/tests/data/tpch_region_small.parquet new file mode 100644 index 0000000000000..5e00a1f6da1d9 Binary files /dev/null and b/datafusion/core/tests/data/tpch_region_small.parquet differ diff --git a/datafusion/core/tests/data/tpch_supplier_small.parquet b/datafusion/core/tests/data/tpch_supplier_small.parquet new file mode 100644 index 0000000000000..18323395fcbed Binary files /dev/null and b/datafusion/core/tests/data/tpch_supplier_small.parquet differ diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs index 8d60dbea3d019..36a1161541756 100644 --- a/datafusion/core/tests/dataframe/mod.rs +++ b/datafusion/core/tests/dataframe/mod.rs @@ -68,6 +68,7 @@ use datafusion_common::{ TableReference, UnnestOptions, }; use datafusion_common_runtime::SpawnedTask; +use datafusion_datasource::file_format::format_as_file_type; use datafusion_execution::config::SessionConfig; use datafusion_execution::runtime_env::RuntimeEnv; use datafusion_expr::expr::{FieldMetadata, GroupingSet, Sort, WindowFunction}; @@ -75,8 +76,8 @@ use datafusion_expr::var_provider::{VarProvider, VarType}; use datafusion_expr::{ cast, col, create_udf, exists, in_subquery, lit, out_ref_col, placeholder, scalar_subquery, when, wildcard, Expr, ExprFunctionExt, ExprSchemable, LogicalPlan, - ScalarFunctionImplementation, WindowFrame, WindowFrameBound, WindowFrameUnits, - WindowFunctionDefinition, + LogicalPlanBuilder, ScalarFunctionImplementation, SortExpr, WindowFrame, + WindowFrameBound, WindowFrameUnits, WindowFunctionDefinition, }; use datafusion_physical_expr::expressions::Column; use datafusion_physical_expr::Partitioning; @@ -1360,6 +1361,36 @@ async fn except() -> Result<()> { Ok(()) } +#[tokio::test] +async fn except_distinct() -> Result<()> { + let df = test_table().await?.select_columns(&["c1", "c3"])?; + let d2 = df.clone(); + let plan = df.except_distinct(d2)?; + let result = plan.logical_plan().clone(); + let expected = create_plan( + "SELECT c1, c3 FROM aggregate_test_100 + EXCEPT DISTINCT SELECT c1, c3 FROM aggregate_test_100", + ) + .await?; + assert_same_plan(&result, &expected); + Ok(()) +} + +#[tokio::test] +async fn intersect_distinct() -> Result<()> { + let df = test_table().await?.select_columns(&["c1", "c3"])?; + let d2 = df.clone(); + let plan = df.intersect_distinct(d2)?; + let result = plan.logical_plan().clone(); + let expected = create_plan( + "SELECT c1, c3 FROM aggregate_test_100 + INTERSECT DISTINCT SELECT c1, c3 FROM aggregate_test_100", + ) + .await?; + assert_same_plan(&result, &expected); + Ok(()) +} + #[tokio::test] async fn register_table() -> Result<()> { let df = test_table().await?.select_columns(&["c1", "c12"])?; @@ -2787,20 +2818,20 @@ async fn test_count_wildcard_on_window() -> Result<()> { assert_snapshot!( pretty_format_batches(&sql_results).unwrap(), - @r###" - +---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | plan_type | plan | - +---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | logical_plan | Projection: count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING AS count(*) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING | - | | WindowAggr: windowExpr=[[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING]] | - | | TableScan: t1 projection=[a] | - | physical_plan | ProjectionExec: expr=[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@1 as count(*) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING] | - | | BoundedWindowAggExec: wdw=[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING: Ok(Field { name: "count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt32(6)), end_bound: Following(UInt32(2)), is_causal: false }], mode=[Sorted] | - | | SortExec: expr=[a@0 DESC], preserve_partitioning=[false] | - | | DataSourceExec: partitions=1, partition_sizes=[1] | - | | | - +---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - "### + @r#" + +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | plan_type | plan | + +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | logical_plan | Projection: count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING AS count(*) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING | + | | WindowAggr: windowExpr=[[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING]] | + | | TableScan: t1 projection=[a] | + | physical_plan | ProjectionExec: expr=[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@1 as count(*) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING] | + | | BoundedWindowAggExec: wdw=[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING: Field { name: "count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING], mode=[Sorted] | + | | SortExec: expr=[a@0 DESC], preserve_partitioning=[false] | + | | DataSourceExec: partitions=1, partition_sizes=[1] | + | | | + +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + "# ); let df_results = ctx @@ -2821,20 +2852,20 @@ async fn test_count_wildcard_on_window() -> Result<()> { assert_snapshot!( pretty_format_batches(&df_results).unwrap(), - @r###" - +---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | plan_type | plan | - +---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | logical_plan | Projection: count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING | - | | WindowAggr: windowExpr=[[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING]] | - | | TableScan: t1 projection=[a] | - | physical_plan | ProjectionExec: expr=[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@1 as count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING] | - | | BoundedWindowAggExec: wdw=[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING: Ok(Field { name: "count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt32(6)), end_bound: Following(UInt32(2)), is_causal: false }], mode=[Sorted] | - | | SortExec: expr=[a@0 DESC], preserve_partitioning=[false] | - | | DataSourceExec: partitions=1, partition_sizes=[1] | - | | | - +---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - "### + @r#" + +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | plan_type | plan | + +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | logical_plan | Projection: count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING | + | | WindowAggr: windowExpr=[[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING]] | + | | TableScan: t1 projection=[a] | + | physical_plan | ProjectionExec: expr=[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@1 as count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING] | + | | BoundedWindowAggExec: wdw=[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING: Field { name: "count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING], mode=[Sorted] | + | | SortExec: expr=[a@0 DESC], preserve_partitioning=[false] | + | | DataSourceExec: partitions=1, partition_sizes=[1] | + | | | + +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + "# ); Ok(()) @@ -4851,7 +4882,7 @@ async fn use_var_provider() -> Result<()> { Field::new("bar", DataType::Int64, false), ])); - let mem_table = Arc::new(MemTable::try_new(schema, vec![])?); + let mem_table = Arc::new(MemTable::try_new(schema, vec![vec![]])?); let config = SessionConfig::new() .with_target_partitions(4) @@ -6137,3 +6168,86 @@ async fn test_dataframe_macro() -> Result<()> { Ok(()) } + +#[tokio::test] +async fn test_copy_schema() -> Result<()> { + let tmp_dir = TempDir::new()?; + + let session_state = SessionStateBuilder::new_with_default_features().build(); + + let session_ctx = SessionContext::new_with_state(session_state); + + let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int64, true)])); + + // Create and register the source table with the provided schema and data + let source_table = Arc::new(MemTable::try_new(schema.clone(), vec![vec![]])?); + session_ctx.register_table("source_table", source_table.clone())?; + + let target_path = tmp_dir.path().join("target.csv"); + + let query = format!( + "COPY source_table TO '{:?}' STORED AS csv", + target_path.to_str().unwrap() + ); + + let result = session_ctx.sql(&query).await?; + assert_logical_expr_schema_eq_physical_expr_schema(result).await?; + Ok(()) +} + +#[tokio::test] +async fn test_copy_to_preserves_order() -> Result<()> { + let tmp_dir = TempDir::new()?; + + let session_state = SessionStateBuilder::new_with_default_features().build(); + let session_ctx = SessionContext::new_with_state(session_state); + + let target_path = tmp_dir.path().join("target_ordered.csv"); + let csv_file_format = session_ctx + .state() + .get_file_format_factory("csv") + .map(format_as_file_type) + .unwrap(); + + let ordered_select_plan = LogicalPlanBuilder::values(vec![ + vec![lit(1u64)], + vec![lit(10u64)], + vec![lit(20u64)], + vec![lit(100u64)], + ])? + .sort(vec![SortExpr::new(col("column1"), false, true)])? + .build()?; + + let copy_to_plan = LogicalPlanBuilder::copy_to( + ordered_select_plan, + target_path.to_str().unwrap().to_string(), + csv_file_format, + HashMap::new(), + vec![], + )? + .build()?; + + let union_side_branch = LogicalPlanBuilder::values(vec![vec![lit(1u64)]])?.build()?; + let union_plan = LogicalPlanBuilder::from(copy_to_plan) + .union(union_side_branch)? + .build()?; + + let frame = session_ctx.execute_logical_plan(union_plan).await?; + let physical_plan = frame.create_physical_plan().await?; + + let physical_plan_format = + displayable(physical_plan.as_ref()).indent(true).to_string(); + + // Expect that input to the DataSinkExec is sorted correctly + assert_snapshot!( + physical_plan_format, + @r###" + UnionExec + DataSinkExec: sink=CsvSink(file_groups=[]) + SortExec: expr=[column1@0 DESC], preserve_partitioning=[false] + DataSourceExec: partitions=1, partition_sizes=[1] + DataSourceExec: partitions=1, partition_sizes=[1] + "### + ); + Ok(()) +} diff --git a/datafusion/core/tests/execution/datasource_split.rs b/datafusion/core/tests/execution/datasource_split.rs new file mode 100644 index 0000000000000..0b90c6f326168 --- /dev/null +++ b/datafusion/core/tests/execution/datasource_split.rs @@ -0,0 +1,123 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::{ + array::{ArrayRef, Int32Array}, + datatypes::{DataType, Field, Schema}, + record_batch::RecordBatch, +}; +use datafusion_datasource::memory::MemorySourceConfig; +use datafusion_execution::TaskContext; +use datafusion_physical_plan::{common::collect, ExecutionPlan}; +use std::sync::Arc; + +/// Helper function to create a memory source with the given batch size and collect all batches +async fn create_and_collect_batches( + batch_size: usize, +) -> datafusion_common::Result> { + let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])); + let array = Int32Array::from_iter_values(0..batch_size as i32); + let batch = RecordBatch::try_new(schema.clone(), vec![Arc::new(array) as ArrayRef])?; + let exec = MemorySourceConfig::try_new_exec(&[vec![batch]], schema, None)?; + let ctx = Arc::new(TaskContext::default()); + let stream = exec.execute(0, ctx)?; + collect(stream).await +} + +/// Helper function to create a memory source with multiple batches and collect all results +async fn create_and_collect_multiple_batches( + input_batches: Vec, +) -> datafusion_common::Result> { + let schema = input_batches[0].schema(); + let exec = MemorySourceConfig::try_new_exec(&[input_batches], schema, None)?; + let ctx = Arc::new(TaskContext::default()); + let stream = exec.execute(0, ctx)?; + collect(stream).await +} + +#[tokio::test] +async fn datasource_splits_large_batches() -> datafusion_common::Result<()> { + let batch_size = 20000; + let batches = create_and_collect_batches(batch_size).await?; + + assert!(batches.len() > 1); + let max = batches.iter().map(|b| b.num_rows()).max().unwrap(); + assert!( + max <= datafusion_execution::config::SessionConfig::new() + .options() + .execution + .batch_size + ); + let total: usize = batches.iter().map(|b| b.num_rows()).sum(); + assert_eq!(total, batch_size); + Ok(()) +} + +#[tokio::test] +async fn datasource_exact_batch_size_no_split() -> datafusion_common::Result<()> { + let session_config = datafusion_execution::config::SessionConfig::new(); + let configured_batch_size = session_config.options().execution.batch_size; + + let batches = create_and_collect_batches(configured_batch_size).await?; + + // Should not split when exactly equal to batch_size + assert_eq!(batches.len(), 1); + assert_eq!(batches[0].num_rows(), configured_batch_size); + Ok(()) +} + +#[tokio::test] +async fn datasource_small_batch_no_split() -> datafusion_common::Result<()> { + // Test with batch smaller than the batch size (8192) + let small_batch_size = 512; // Less than 8192 + + let batches = create_and_collect_batches(small_batch_size).await?; + + // Should not split small batches below the batch size + assert_eq!(batches.len(), 1); + assert_eq!(batches[0].num_rows(), small_batch_size); + Ok(()) +} + +#[tokio::test] +async fn datasource_empty_batch_clean_termination() -> datafusion_common::Result<()> { + let batches = create_and_collect_batches(0).await?; + + // Empty batch should result in one empty batch + assert_eq!(batches.len(), 1); + assert_eq!(batches[0].num_rows(), 0); + Ok(()) +} + +#[tokio::test] +async fn datasource_multiple_empty_batches() -> datafusion_common::Result<()> { + let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])); + let empty_array = Int32Array::from_iter_values(std::iter::empty::()); + let empty_batch = + RecordBatch::try_new(schema.clone(), vec![Arc::new(empty_array) as ArrayRef])?; + + // Create multiple empty batches + let input_batches = vec![empty_batch.clone(), empty_batch.clone(), empty_batch]; + let batches = create_and_collect_multiple_batches(input_batches).await?; + + // Should preserve empty batches without issues + assert_eq!(batches.len(), 3); + for batch in &batches { + assert_eq!(batch.num_rows(), 0); + } + Ok(()) +} diff --git a/datafusion/core/tests/execution/logical_plan.rs b/datafusion/core/tests/execution/logical_plan.rs index f5a8a30e01307..da8f9807225dd 100644 --- a/datafusion/core/tests/execution/logical_plan.rs +++ b/datafusion/core/tests/execution/logical_plan.rs @@ -68,7 +68,7 @@ async fn count_only_nulls() -> Result<()> { args: vec![input_col_ref], distinct: false, filter: None, - order_by: None, + order_by: vec![], null_treatment: None, }, })], diff --git a/datafusion/core/tests/execution/mod.rs b/datafusion/core/tests/execution/mod.rs index f367a29017a34..8770b2a201051 100644 --- a/datafusion/core/tests/execution/mod.rs +++ b/datafusion/core/tests/execution/mod.rs @@ -16,4 +16,5 @@ // under the License. mod coop; +mod datasource_split; mod logical_plan; diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs index cfb3c1c6a1b98..5642326514241 100644 --- a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs +++ b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs @@ -171,7 +171,7 @@ impl AggregationFuzzer { let datasets = self .dataset_generator .generate() - .expect("should success to generate dataset"); + .expect("should succeed to generate dataset"); // Then for each of them, we random select a test sql for it let query_groups = datasets @@ -216,16 +216,16 @@ impl AggregationFuzzer { // Generate the baseline context, and get the baseline result firstly let baseline_ctx_with_params = ctx_generator .generate_baseline() - .expect("should success to generate baseline session context"); + .expect("should succeed to generate baseline session context"); let baseline_result = run_sql(&sql, &baseline_ctx_with_params.ctx) .await - .expect("should success to run baseline sql"); + .expect("should succeed to run baseline sql"); let baseline_result = Arc::new(baseline_result); // Generate test tasks for _ in 0..CTX_GEN_ROUNDS { let ctx_with_params = ctx_generator .generate() - .expect("should success to generate session context"); + .expect("should succeed to generate session context"); let task = AggregationFuzzTestTask { dataset_ref: dataset_ref.clone(), expected_result: baseline_result.clone(), diff --git a/datafusion/core/tests/fuzz_cases/equivalence/ordering.rs b/datafusion/core/tests/fuzz_cases/equivalence/ordering.rs index 0d500fd7f441d..171839b390ffa 100644 --- a/datafusion/core/tests/fuzz_cases/equivalence/ordering.rs +++ b/datafusion/core/tests/fuzz_cases/equivalence/ordering.rs @@ -21,6 +21,7 @@ use crate::fuzz_cases::equivalence::utils::{ is_table_same_after_sort, TestScalarUDF, }; use arrow::compute::SortOptions; +use datafusion_common::config::ConfigOptions; use datafusion_common::Result; use datafusion_expr::{Operator, ScalarUDF}; use datafusion_physical_expr::equivalence::{ @@ -110,6 +111,7 @@ fn test_ordering_satisfy_with_equivalence_complex_random() -> Result<()> { Arc::clone(&test_fun), vec![col_a], &test_schema, + Arc::new(ConfigOptions::default()), )?); let a_plus_b = Arc::new(BinaryExpr::new( col("a", &test_schema)?, diff --git a/datafusion/core/tests/fuzz_cases/equivalence/projection.rs b/datafusion/core/tests/fuzz_cases/equivalence/projection.rs index d776796a1b752..f783ab1cc1b41 100644 --- a/datafusion/core/tests/fuzz_cases/equivalence/projection.rs +++ b/datafusion/core/tests/fuzz_cases/equivalence/projection.rs @@ -20,6 +20,7 @@ use crate::fuzz_cases::equivalence::utils::{ is_table_same_after_sort, TestScalarUDF, }; use arrow::compute::SortOptions; +use datafusion_common::config::ConfigOptions; use datafusion_common::Result; use datafusion_expr::{Operator, ScalarUDF}; use datafusion_physical_expr::equivalence::ProjectionMapping; @@ -49,6 +50,7 @@ fn project_orderings_random() -> Result<()> { Arc::clone(&test_fun), vec![col_a], &test_schema, + Arc::new(ConfigOptions::default()), )?); // a + b let a_plus_b = Arc::new(BinaryExpr::new( @@ -122,6 +124,7 @@ fn ordering_satisfy_after_projection_random() -> Result<()> { Arc::clone(&test_fun), vec![col_a], &test_schema, + Arc::new(ConfigOptions::default()), )?) as PhysicalExprRef; // a + b let a_plus_b = Arc::new(BinaryExpr::new( diff --git a/datafusion/core/tests/fuzz_cases/equivalence/properties.rs b/datafusion/core/tests/fuzz_cases/equivalence/properties.rs index e35ce3a6f8c99..382c4da943219 100644 --- a/datafusion/core/tests/fuzz_cases/equivalence/properties.rs +++ b/datafusion/core/tests/fuzz_cases/equivalence/properties.rs @@ -28,6 +28,7 @@ use datafusion_physical_expr::expressions::{col, BinaryExpr}; use datafusion_physical_expr::{LexOrdering, ScalarFunctionExpr}; use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr; +use datafusion_common::config::ConfigOptions; use itertools::Itertools; #[test] @@ -49,6 +50,7 @@ fn test_find_longest_permutation_random() -> Result<()> { Arc::clone(&test_fun), vec![col_a], &test_schema, + Arc::new(ConfigOptions::default()), )?) as _; let a_plus_b = Arc::new(BinaryExpr::new( diff --git a/datafusion/core/tests/fuzz_cases/record_batch_generator.rs b/datafusion/core/tests/fuzz_cases/record_batch_generator.rs index 4eac1482ad3f9..e7f63b5351046 100644 --- a/datafusion/core/tests/fuzz_cases/record_batch_generator.rs +++ b/datafusion/core/tests/fuzz_cases/record_batch_generator.rs @@ -724,15 +724,13 @@ impl RecordBatchGenerator { { // We generate just num_distinct values because they will be reused by different keys let mut array_gen_rng = array_gen_rng; - + debug_assert!((0.0..=1.0).contains(&null_pct)); let values = Self::generate_array_of_type_inner( &ColumnDescr::new("values", *value_type.clone()), num_distinct, batch_gen_rng, array_gen_rng.clone(), - // Once https://github.com/apache/datafusion/issues/16228 is fixed - // we can also generate nulls in values - 0.0, // null values are generated on the key level + null_pct, // generate some null values ); match key_type.as_ref() { diff --git a/datafusion/core/tests/integration_tests/schema_adapter_integration_tests.rs b/datafusion/core/tests/integration_tests/schema_adapter_integration_tests.rs index 833af04680dbb..e3d53a31c5493 100644 --- a/datafusion/core/tests/integration_tests/schema_adapter_integration_tests.rs +++ b/datafusion/core/tests/integration_tests/schema_adapter_integration_tests.rs @@ -148,6 +148,70 @@ async fn test_parquet_integration_with_schema_adapter() -> Result<()> { Ok(()) } +#[cfg(feature = "parquet")] +#[tokio::test] +async fn test_parquet_integration_with_schema_adapter_and_expression_rewriter() -> Result<()> { + // Create a temporary directory for our test file + let tmp_dir = TempDir::new()?; + let file_path = tmp_dir.path().join("test.parquet"); + let file_path_str = file_path.to_str().unwrap(); + + // Create test data + let schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int32, false), + Field::new("name", DataType::Utf8, true), + ])); + + let batch = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(arrow::array::Int32Array::from(vec![1, 2, 3])), + Arc::new(arrow::array::StringArray::from(vec!["a", "b", "c"])), + ], + )?; + + // Write test parquet file + let file = std::fs::File::create(file_path_str)?; + let props = WriterProperties::builder().build(); + let mut writer = ArrowWriter::try_new(file, schema.clone(), Some(props))?; + writer.write(&batch)?; + writer.close()?; + + // Create a session context + let ctx = SessionContext::new(); + + // Create a ParquetSource with the adapter factory + let source = ParquetSource::default() + .with_schema_adapter_factory(Arc::new(UppercaseAdapterFactory {})); + + // Create a scan config + let config = FileScanConfigBuilder::new( + ObjectStoreUrl::parse(&format!("file://{}", file_path_str))?, + schema.clone(), + ) + .with_source(source) + .build(); + + // Create a data source executor + let exec = DataSourceExec::from_data_source(config); + + // Collect results + let task_ctx = ctx.task_ctx(); + let stream = exec.execute(0, task_ctx)?; + let batches = datafusion::physical_plan::common::collect(stream).await?; + + // There should be one batch + assert_eq!(batches.len(), 1); + + // Verify the schema has uppercase column names + let result_schema = batches[0].schema(); + assert_eq!(result_schema.field(0).name(), "ID"); + assert_eq!(result_schema.field(1).name(), "NAME"); + + Ok(()) +} + + #[tokio::test] async fn test_multi_source_schema_adapter_reuse() -> Result<()> { // This test verifies that the same schema adapter factory can be reused diff --git a/datafusion/core/tests/memory_limit/mod.rs b/datafusion/core/tests/memory_limit/mod.rs index 2b262d4326cc2..3cc177feac0fc 100644 --- a/datafusion/core/tests/memory_limit/mod.rs +++ b/datafusion/core/tests/memory_limit/mod.rs @@ -1084,9 +1084,9 @@ fn batches_byte_size(batches: &[RecordBatch]) -> usize { } #[derive(Debug)] -struct DummyStreamPartition { - schema: SchemaRef, - batches: Vec, +pub(crate) struct DummyStreamPartition { + pub(crate) schema: SchemaRef, + pub(crate) batches: Vec, } impl PartitionStream for DummyStreamPartition { diff --git a/datafusion/core/tests/parquet/encryption.rs b/datafusion/core/tests/parquet/encryption.rs index 203c985428bc0..8e90b9aaa9551 100644 --- a/datafusion/core/tests/parquet/encryption.rs +++ b/datafusion/core/tests/parquet/encryption.rs @@ -74,6 +74,7 @@ pub fn write_batches( Ok(num_rows) } +#[cfg(feature = "parquet_encryption")] #[tokio::test] async fn round_trip_encryption() { let ctx: SessionContext = SessionContext::new(); diff --git a/datafusion/core/tests/parquet/mod.rs b/datafusion/core/tests/parquet/mod.rs index 94d6d152a3847..4f9dde08a692c 100644 --- a/datafusion/core/tests/parquet/mod.rs +++ b/datafusion/core/tests/parquet/mod.rs @@ -50,6 +50,7 @@ mod filter_pushdown; mod page_pruning; mod row_group_pruning; mod schema; +mod schema_adapter; mod schema_coercion; mod utils; diff --git a/datafusion/core/tests/parquet/page_pruning.rs b/datafusion/core/tests/parquet/page_pruning.rs index 9da879a32f6b5..5b37c55c09e41 100644 --- a/datafusion/core/tests/parquet/page_pruning.rs +++ b/datafusion/core/tests/parquet/page_pruning.rs @@ -20,6 +20,7 @@ use std::sync::Arc; use crate::parquet::Unit::Page; use crate::parquet::{ContextWithParquet, Scenario}; +use arrow::array::RecordBatch; use datafusion::datasource::file_format::parquet::ParquetFormat; use datafusion::datasource::file_format::FileFormat; use datafusion::datasource::listing::PartitionedFile; @@ -40,7 +41,11 @@ use futures::StreamExt; use object_store::path::Path; use object_store::ObjectMeta; -async fn get_parquet_exec(state: &SessionState, filter: Expr) -> DataSourceExec { +async fn get_parquet_exec( + state: &SessionState, + filter: Expr, + pushdown_filters: bool, +) -> DataSourceExec { let object_store_url = ObjectStoreUrl::local_filesystem(); let store = state.runtime_env().object_store(&object_store_url).unwrap(); @@ -78,7 +83,8 @@ async fn get_parquet_exec(state: &SessionState, filter: Expr) -> DataSourceExec let source = Arc::new( ParquetSource::default() .with_predicate(predicate) - .with_enable_page_index(true), + .with_enable_page_index(true) + .with_pushdown_filters(pushdown_filters), ); let base_config = FileScanConfigBuilder::new(object_store_url, schema, source) .with_file(partitioned_file) @@ -87,38 +93,44 @@ async fn get_parquet_exec(state: &SessionState, filter: Expr) -> DataSourceExec DataSourceExec::new(Arc::new(base_config)) } +async fn get_filter_results( + state: &SessionState, + filter: Expr, + pushdown_filters: bool, +) -> Vec { + let parquet_exec = get_parquet_exec(state, filter, pushdown_filters).await; + let task_ctx = state.task_ctx(); + let mut results = parquet_exec.execute(0, task_ctx.clone()).unwrap(); + let mut batches = Vec::new(); + while let Some(Ok(batch)) = results.next().await { + batches.push(batch); + } + batches +} + #[tokio::test] async fn page_index_filter_one_col() { let session_ctx = SessionContext::new(); let state = session_ctx.state(); - let task_ctx = state.task_ctx(); // 1.create filter month == 1; let filter = col("month").eq(lit(1_i32)); - let parquet_exec = get_parquet_exec(&state, filter).await; - - let mut results = parquet_exec.execute(0, task_ctx.clone()).unwrap(); - - let batch = results.next().await.unwrap().unwrap(); - + let batches = get_filter_results(&state, filter.clone(), false).await; // `month = 1` from the page index should create below RowSelection // vec.push(RowSelector::select(312)); // vec.push(RowSelector::skip(3330)); // vec.push(RowSelector::select(339)); // vec.push(RowSelector::skip(3319)); // total 651 row - assert_eq!(batch.num_rows(), 651); + assert_eq!(batches[0].num_rows(), 651); + + let batches = get_filter_results(&state, filter, true).await; + assert_eq!(batches[0].num_rows(), 620); // 2. create filter month == 1 or month == 2; let filter = col("month").eq(lit(1_i32)).or(col("month").eq(lit(2_i32))); - - let parquet_exec = get_parquet_exec(&state, filter).await; - - let mut results = parquet_exec.execute(0, task_ctx.clone()).unwrap(); - - let batch = results.next().await.unwrap().unwrap(); - + let batches = get_filter_results(&state, filter.clone(), false).await; // `month = 1` or `month = 2` from the page index should create below RowSelection // vec.push(RowSelector::select(312)); // vec.push(RowSelector::skip(900)); @@ -128,95 +140,78 @@ async fn page_index_filter_one_col() { // vec.push(RowSelector::skip(873)); // vec.push(RowSelector::select(318)); // vec.push(RowSelector::skip(2128)); - assert_eq!(batch.num_rows(), 1281); + assert_eq!(batches[0].num_rows(), 1281); + + let batches = get_filter_results(&state, filter, true).await; + assert_eq!(batches[0].num_rows(), 1180); // 3. create filter month == 1 and month == 12; let filter = col("month") .eq(lit(1_i32)) .and(col("month").eq(lit(12_i32))); + let batches = get_filter_results(&state, filter.clone(), false).await; + assert!(batches.is_empty()); - let parquet_exec = get_parquet_exec(&state, filter).await; - - let mut results = parquet_exec.execute(0, task_ctx.clone()).unwrap(); - - let batch = results.next().await; - - assert!(batch.is_none()); + let batches = get_filter_results(&state, filter, true).await; + assert!(batches.is_empty()); // 4.create filter 0 < month < 2 ; let filter = col("month").gt(lit(0_i32)).and(col("month").lt(lit(2_i32))); - - let parquet_exec = get_parquet_exec(&state, filter).await; - - let mut results = parquet_exec.execute(0, task_ctx.clone()).unwrap(); - - let batch = results.next().await.unwrap().unwrap(); - + let batches = get_filter_results(&state, filter.clone(), false).await; // should same with `month = 1` - assert_eq!(batch.num_rows(), 651); - - let session_ctx = SessionContext::new(); - let task_ctx = session_ctx.task_ctx(); + assert_eq!(batches[0].num_rows(), 651); + let batches = get_filter_results(&state, filter, true).await; + assert_eq!(batches[0].num_rows(), 620); // 5.create filter date_string_col == "01/01/09"`; // Note this test doesn't apply type coercion so the literal must match the actual view type let filter = col("date_string_col").eq(lit(ScalarValue::new_utf8view("01/01/09"))); - let parquet_exec = get_parquet_exec(&state, filter).await; - let mut results = parquet_exec.execute(0, task_ctx.clone()).unwrap(); - let batch = results.next().await.unwrap().unwrap(); + let batches = get_filter_results(&state, filter.clone(), false).await; + assert_eq!(batches[0].num_rows(), 14); // there should only two pages match the filter // min max // page-20 0 01/01/09 01/02/09 // page-21 0 01/01/09 01/01/09 // each 7 rows - assert_eq!(batch.num_rows(), 14); + assert_eq!(batches[0].num_rows(), 14); + let batches = get_filter_results(&state, filter, true).await; + assert_eq!(batches[0].num_rows(), 10); } #[tokio::test] async fn page_index_filter_multi_col() { let session_ctx = SessionContext::new(); let state = session_ctx.state(); - let task_ctx = session_ctx.task_ctx(); // create filter month == 1 and year = 2009; let filter = col("month").eq(lit(1_i32)).and(col("year").eq(lit(2009))); - - let parquet_exec = get_parquet_exec(&state, filter).await; - - let mut results = parquet_exec.execute(0, task_ctx.clone()).unwrap(); - - let batch = results.next().await.unwrap().unwrap(); - + let batches = get_filter_results(&state, filter.clone(), false).await; // `year = 2009` from the page index should create below RowSelection // vec.push(RowSelector::select(3663)); // vec.push(RowSelector::skip(3642)); // combine with `month = 1` total 333 row - assert_eq!(batch.num_rows(), 333); + assert_eq!(batches[0].num_rows(), 333); + let batches = get_filter_results(&state, filter, true).await; + assert_eq!(batches[0].num_rows(), 310); // create filter (year = 2009 or id = 1) and month = 1; // this should only use `month = 1` to evaluate the page index. let filter = col("month") .eq(lit(1_i32)) .and(col("year").eq(lit(2009)).or(col("id").eq(lit(1)))); - - let parquet_exec = get_parquet_exec(&state, filter).await; - - let mut results = parquet_exec.execute(0, task_ctx.clone()).unwrap(); - - let batch = results.next().await.unwrap().unwrap(); - assert_eq!(batch.num_rows(), 651); + let batches = get_filter_results(&state, filter.clone(), false).await; + assert_eq!(batches[0].num_rows(), 651); + let batches = get_filter_results(&state, filter, true).await; + assert_eq!(batches[0].num_rows(), 310); // create filter (year = 2009 or id = 1) // this filter use two columns will not push down let filter = col("year").eq(lit(2009)).or(col("id").eq(lit(1))); - - let parquet_exec = get_parquet_exec(&state, filter).await; - - let mut results = parquet_exec.execute(0, task_ctx.clone()).unwrap(); - - let batch = results.next().await.unwrap().unwrap(); - assert_eq!(batch.num_rows(), 7300); + let batches = get_filter_results(&state, filter.clone(), false).await; + assert_eq!(batches[0].num_rows(), 7300); + let batches = get_filter_results(&state, filter, true).await; + assert_eq!(batches[0].num_rows(), 3650); // create filter (year = 2009 and id = 1) or (year = 2010) // this filter use two columns will not push down @@ -226,13 +221,10 @@ async fn page_index_filter_multi_col() { .eq(lit(2009)) .and(col("id").eq(lit(1))) .or(col("year").eq(lit(2010))); - - let parquet_exec = get_parquet_exec(&state, filter).await; - - let mut results = parquet_exec.execute(0, task_ctx.clone()).unwrap(); - - let batch = results.next().await.unwrap().unwrap(); - assert_eq!(batch.num_rows(), 7300); + let batches = get_filter_results(&state, filter.clone(), false).await; + assert_eq!(batches[0].num_rows(), 7300); + let batches = get_filter_results(&state, filter, true).await; + assert_eq!(batches[0].num_rows(), 3651); } async fn test_prune( diff --git a/datafusion/core/tests/parquet/schema_adapter.rs b/datafusion/core/tests/parquet/schema_adapter.rs new file mode 100644 index 0000000000000..2bfd9bd6b842d --- /dev/null +++ b/datafusion/core/tests/parquet/schema_adapter.rs @@ -0,0 +1,372 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use arrow::array::{record_batch, RecordBatch, RecordBatchOptions}; +use arrow::compute::{cast_with_options, CastOptions}; +use arrow_schema::{DataType, Field, FieldRef, Schema, SchemaRef}; +use bytes::{BufMut, BytesMut}; +use datafusion::assert_batches_eq; +use datafusion::common::Result; +use datafusion::datasource::listing::{ListingTable, ListingTableConfig}; +use datafusion::prelude::{SessionConfig, SessionContext}; +use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; +use datafusion_common::{ColumnStatistics, ScalarValue}; +use datafusion_datasource::schema_adapter::{ + DefaultSchemaAdapterFactory, SchemaAdapter, SchemaAdapterFactory, SchemaMapper, +}; +use datafusion_datasource::ListingTableUrl; +use datafusion_execution::object_store::ObjectStoreUrl; +use datafusion_physical_expr::expressions::{self, Column}; +use datafusion_physical_expr::schema_rewriter::{ + DefaultPhysicalExprAdapterFactory, PhysicalExprAdapter, PhysicalExprAdapterFactory, +}; +use datafusion_physical_expr::{DefaultPhysicalExprAdapter, PhysicalExpr}; +use itertools::Itertools; +use object_store::{memory::InMemory, path::Path, ObjectStore}; +use parquet::arrow::ArrowWriter; + +async fn write_parquet(batch: RecordBatch, store: Arc, path: &str) { + let mut out = BytesMut::new().writer(); + { + let mut writer = ArrowWriter::try_new(&mut out, batch.schema(), None).unwrap(); + writer.write(&batch).unwrap(); + writer.finish().unwrap(); + } + let data = out.into_inner().freeze(); + store.put(&Path::from(path), data.into()).await.unwrap(); +} + +#[derive(Debug)] +struct CustomSchemaAdapterFactory; + +impl SchemaAdapterFactory for CustomSchemaAdapterFactory { + fn create( + &self, + projected_table_schema: SchemaRef, + _table_schema: SchemaRef, + ) -> Box { + Box::new(CustomSchemaAdapter { + logical_file_schema: projected_table_schema, + }) + } +} + +#[derive(Debug)] +struct CustomSchemaAdapter { + logical_file_schema: SchemaRef, +} + +impl SchemaAdapter for CustomSchemaAdapter { + fn map_column_index(&self, index: usize, file_schema: &Schema) -> Option { + for (idx, field) in file_schema.fields().iter().enumerate() { + if field.name() == self.logical_file_schema.field(index).name() { + return Some(idx); + } + } + None + } + + fn map_schema( + &self, + file_schema: &Schema, + ) -> Result<(Arc, Vec)> { + let projection = (0..file_schema.fields().len()).collect_vec(); + Ok(( + Arc::new(CustomSchemaMapper { + logical_file_schema: Arc::clone(&self.logical_file_schema), + }), + projection, + )) + } +} + +#[derive(Debug)] +struct CustomSchemaMapper { + logical_file_schema: SchemaRef, +} + +impl SchemaMapper for CustomSchemaMapper { + fn map_batch(&self, batch: RecordBatch) -> Result { + let mut output_columns = + Vec::with_capacity(self.logical_file_schema.fields().len()); + for field in self.logical_file_schema.fields() { + if let Some(array) = batch.column_by_name(field.name()) { + output_columns.push(cast_with_options( + array, + field.data_type(), + &CastOptions::default(), + )?); + } else { + // Create a new array with the default value for the field type + let default_value = match field.data_type() { + DataType::Int64 => ScalarValue::Int64(Some(0)), + DataType::Utf8 => ScalarValue::Utf8(Some("a".to_string())), + _ => unimplemented!("Unsupported data type: {:?}", field.data_type()), + }; + output_columns + .push(default_value.to_array_of_size(batch.num_rows()).unwrap()); + } + } + let batch = RecordBatch::try_new_with_options( + Arc::clone(&self.logical_file_schema), + output_columns, + &RecordBatchOptions::new().with_row_count(Some(batch.num_rows())), + ) + .unwrap(); + Ok(batch) + } + + fn map_column_statistics( + &self, + _file_col_statistics: &[ColumnStatistics], + ) -> Result> { + Ok(vec![ + ColumnStatistics::new_unknown(); + self.logical_file_schema.fields().len() + ]) + } +} + +// Implement a custom PhysicalExprAdapterFactory that fills in missing columns with the default value for the field type +#[derive(Debug)] +struct CustomPhysicalExprAdapterFactory; + +impl PhysicalExprAdapterFactory for CustomPhysicalExprAdapterFactory { + fn create( + &self, + logical_file_schema: SchemaRef, + physical_file_schema: SchemaRef, + ) -> Arc { + Arc::new(CustomPhysicalExprAdapter { + logical_file_schema: Arc::clone(&logical_file_schema), + physical_file_schema: Arc::clone(&physical_file_schema), + inner: Arc::new(DefaultPhysicalExprAdapter::new( + logical_file_schema, + physical_file_schema, + )), + }) + } +} + +#[derive(Debug, Clone)] +struct CustomPhysicalExprAdapter { + logical_file_schema: SchemaRef, + physical_file_schema: SchemaRef, + inner: Arc, +} + +impl PhysicalExprAdapter for CustomPhysicalExprAdapter { + fn rewrite(&self, mut expr: Arc) -> Result> { + expr = expr + .transform(|expr| { + if let Some(column) = expr.as_any().downcast_ref::() { + let field_name = column.name(); + if self + .physical_file_schema + .field_with_name(field_name) + .ok() + .is_none() + { + let field = self + .logical_file_schema + .field_with_name(field_name) + .map_err(|_| { + datafusion_common::DataFusionError::Plan(format!( + "Field '{field_name}' not found in logical file schema", + )) + })?; + // If the field does not exist, create a default value expression + // Note that we use slightly different logic here to create a default value so that we can see different behavior in tests + let default_value = match field.data_type() { + DataType::Int64 => ScalarValue::Int64(Some(1)), + DataType::Utf8 => ScalarValue::Utf8(Some("b".to_string())), + _ => unimplemented!( + "Unsupported data type: {:?}", + field.data_type() + ), + }; + return Ok(Transformed::yes(Arc::new( + expressions::Literal::new(default_value), + ))); + } + } + + Ok(Transformed::no(expr)) + }) + .data()?; + self.inner.rewrite(expr) + } + + fn with_partition_values( + &self, + partition_values: Vec<(FieldRef, ScalarValue)>, + ) -> Arc { + assert!( + partition_values.is_empty(), + "Partition values are not supported in this test" + ); + Arc::new(self.clone()) + } +} + +#[tokio::test] +async fn test_custom_schema_adapter_and_custom_expression_adapter() { + let batch = + record_batch!(("extra", Int64, [1, 2, 3]), ("c1", Int32, [1, 2, 3])).unwrap(); + + let store = Arc::new(InMemory::new()) as Arc; + let store_url = ObjectStoreUrl::parse("memory://").unwrap(); + let path = "test.parquet"; + write_parquet(batch, store.clone(), path).await; + + let table_schema = Arc::new(Schema::new(vec![ + Field::new("c1", DataType::Int64, false), + Field::new("c2", DataType::Utf8, true), + ])); + + let mut cfg = SessionConfig::new() + // Disable statistics collection for this test otherwise early pruning makes it hard to demonstrate data adaptation + .with_collect_statistics(false) + .with_parquet_pruning(false) + .with_parquet_page_index_pruning(false); + cfg.options_mut().execution.parquet.pushdown_filters = true; + let ctx = SessionContext::new_with_config(cfg); + ctx.register_object_store(store_url.as_ref(), Arc::clone(&store)); + assert!( + !ctx.state() + .config_mut() + .options_mut() + .execution + .collect_statistics + ); + assert!(!ctx.state().config().collect_statistics()); + + let listing_table_config = + ListingTableConfig::new(ListingTableUrl::parse("memory:///").unwrap()) + .infer_options(&ctx.state()) + .await + .unwrap() + .with_schema(table_schema.clone()) + .with_schema_adapter_factory(Arc::new(DefaultSchemaAdapterFactory)) + .with_expr_adapter_factory(Arc::new(DefaultPhysicalExprAdapterFactory)); + + let table = ListingTable::try_new(listing_table_config).unwrap(); + ctx.register_table("t", Arc::new(table)).unwrap(); + + let batches = ctx + .sql("SELECT c2, c1 FROM t WHERE c1 = 2 AND c2 IS NULL") + .await + .unwrap() + .collect() + .await + .unwrap(); + + let expected = [ + "+----+----+", + "| c2 | c1 |", + "+----+----+", + "| | 2 |", + "+----+----+", + ]; + assert_batches_eq!(expected, &batches); + + // Test using a custom schema adapter and no explicit physical expr adapter + // This should use the custom schema adapter both for projections and predicate pushdown + let listing_table_config = + ListingTableConfig::new(ListingTableUrl::parse("memory:///").unwrap()) + .infer_options(&ctx.state()) + .await + .unwrap() + .with_schema(table_schema.clone()) + .with_schema_adapter_factory(Arc::new(CustomSchemaAdapterFactory)); + let table = ListingTable::try_new(listing_table_config).unwrap(); + ctx.deregister_table("t").unwrap(); + ctx.register_table("t", Arc::new(table)).unwrap(); + let batches = ctx + .sql("SELECT c2, c1 FROM t WHERE c1 = 2 AND c2 = 'a'") + .await + .unwrap() + .collect() + .await + .unwrap(); + let expected = [ + "+----+----+", + "| c2 | c1 |", + "+----+----+", + "| a | 2 |", + "+----+----+", + ]; + assert_batches_eq!(expected, &batches); + + // Do the same test but with a custom physical expr adapter + // Now the default schema adapter will be used for projections, but the custom physical expr adapter will be used for predicate pushdown + let listing_table_config = + ListingTableConfig::new(ListingTableUrl::parse("memory:///").unwrap()) + .infer_options(&ctx.state()) + .await + .unwrap() + .with_schema(table_schema.clone()) + .with_expr_adapter_factory(Arc::new(CustomPhysicalExprAdapterFactory)); + let table = ListingTable::try_new(listing_table_config).unwrap(); + ctx.deregister_table("t").unwrap(); + ctx.register_table("t", Arc::new(table)).unwrap(); + let batches = ctx + .sql("SELECT c2, c1 FROM t WHERE c1 = 2 AND c2 = 'b'") + .await + .unwrap() + .collect() + .await + .unwrap(); + let expected = [ + "+----+----+", + "| c2 | c1 |", + "+----+----+", + "| | 2 |", + "+----+----+", + ]; + assert_batches_eq!(expected, &batches); + + // If we use both then the custom physical expr adapter will be used for predicate pushdown and the custom schema adapter will be used for projections + let listing_table_config = + ListingTableConfig::new(ListingTableUrl::parse("memory:///").unwrap()) + .infer_options(&ctx.state()) + .await + .unwrap() + .with_schema(table_schema.clone()) + .with_schema_adapter_factory(Arc::new(CustomSchemaAdapterFactory)) + .with_expr_adapter_factory(Arc::new(CustomPhysicalExprAdapterFactory)); + let table = ListingTable::try_new(listing_table_config).unwrap(); + ctx.deregister_table("t").unwrap(); + ctx.register_table("t", Arc::new(table)).unwrap(); + let batches = ctx + .sql("SELECT c2, c1 FROM t WHERE c1 = 2 AND c2 = 'b'") + .await + .unwrap() + .collect() + .await + .unwrap(); + let expected = [ + "+----+----+", + "| c2 | c1 |", + "+----+----+", + "| a | 2 |", + "+----+----+", + ]; + assert_batches_eq!(expected, &batches); +} diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs index 38bc10a967e2b..00ef4a4301e52 100644 --- a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs +++ b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs @@ -17,6 +17,7 @@ use std::sync::Arc; +use crate::memory_limit::DummyStreamPartition; use crate::physical_optimizer::test_utils::{ aggregate_exec, bounded_window_exec, bounded_window_exec_with_partition, check_integrity, coalesce_batches_exec, coalesce_partitions_exec, create_test_schema, @@ -32,11 +33,11 @@ use arrow::compute::SortOptions; use arrow::datatypes::{DataType, SchemaRef}; use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{TreeNode, TransformedResult}; -use datafusion_common::{Result, ScalarValue}; +use datafusion_common::{Result, ScalarValue, TableReference}; use datafusion_datasource::file_scan_config::FileScanConfigBuilder; use datafusion_datasource::source::DataSourceExec; use datafusion_expr_common::operator::Operator; -use datafusion_expr::{JoinType, WindowFrame, WindowFrameBound, WindowFrameUnits, WindowFunctionDefinition}; +use datafusion_expr::{JoinType, SortExpr, WindowFrame, WindowFrameBound, WindowFrameUnits, WindowFunctionDefinition}; use datafusion_execution::object_store::ObjectStoreUrl; use datafusion_functions_aggregate::average::avg_udaf; use datafusion_functions_aggregate::count::count_udaf; @@ -61,7 +62,14 @@ use datafusion_physical_optimizer::enforce_sorting::sort_pushdown::{SortPushDown use datafusion_physical_optimizer::enforce_distribution::EnforceDistribution; use datafusion_physical_optimizer::output_requirements::OutputRequirementExec; use datafusion_physical_optimizer::PhysicalOptimizerRule; - +use datafusion::prelude::*; +use arrow::array::{Int32Array, RecordBatch}; +use arrow::datatypes::{Field}; +use arrow_schema::Schema; +use datafusion_execution::TaskContext; +use datafusion_catalog::streaming::StreamingTable; + +use futures::StreamExt; use rstest::rstest; /// Create a sorted Csv exec @@ -634,7 +642,7 @@ async fn test_soft_hard_requirements_remove_soft_requirement() -> Result<()> { bounded_window_exec_with_partition("nullable_col", vec![], partition_bys, sort); let expected_input = [ - "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", + "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", ]; @@ -644,7 +652,7 @@ async fn test_soft_hard_requirements_remove_soft_requirement() -> Result<()> { // " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", // ]; let expected_optimized = [ - "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", + "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", ]; @@ -682,7 +690,7 @@ async fn test_soft_hard_requirements_remove_soft_requirement_without_pushdowns( let expected_input = [ "ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as count]", - " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", ]; @@ -694,7 +702,7 @@ async fn test_soft_hard_requirements_remove_soft_requirement_without_pushdowns( // ]; let expected_optimized = [ "ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as count]", - " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", ]; @@ -728,7 +736,7 @@ async fn test_soft_hard_requirements_remove_soft_requirement_without_pushdowns( ); let expected_input = [ - "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", + "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]", " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", @@ -740,7 +748,7 @@ async fn test_soft_hard_requirements_remove_soft_requirement_without_pushdowns( // " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", // ]; let expected_optimized = [ - "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", + "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", " ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]", " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", @@ -788,8 +796,8 @@ async fn test_soft_hard_requirements_multiple_soft_requirements() -> Result<()> ); let expected_input = [ - "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", - " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", + "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]", " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", @@ -802,8 +810,8 @@ async fn test_soft_hard_requirements_multiple_soft_requirements() -> Result<()> // " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", // ]; let expected_optimized = [ - "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", - " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", + "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", " ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]", " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", @@ -853,10 +861,10 @@ async fn test_soft_hard_requirements_multiple_soft_requirements() -> Result<()> bounded_window_exec_with_partition("count", vec![], partition_bys, sort3); let expected_input = [ - "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", + "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]", " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", @@ -869,8 +877,8 @@ async fn test_soft_hard_requirements_multiple_soft_requirements() -> Result<()> // " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", // ]; let expected_optimized = [ - "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", - " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", + "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", " ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]", " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", @@ -879,6 +887,7 @@ async fn test_soft_hard_requirements_multiple_soft_requirements() -> Result<()> assert_optimized!(expected_input, expected_optimized, physical_plan, true); Ok(()) } + #[tokio::test] async fn test_soft_hard_requirements_multiple_sorts() -> Result<()> { let schema = create_test_schema()?; @@ -924,8 +933,7 @@ async fn test_soft_hard_requirements_multiple_sorts() -> Result<()> { let expected_input = [ "SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", - " ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]", + " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]", " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", ]; @@ -937,7 +945,7 @@ async fn test_soft_hard_requirements_multiple_sorts() -> Result<()> { // ]; let expected_optimized = [ "SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", " ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]", " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", @@ -984,9 +992,9 @@ async fn test_soft_hard_requirements_with_multiple_soft_requirements_and_output_ )); let expected_input = [ - "OutputRequirementExec", - " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", - " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", + "OutputRequirementExec: order_by=[(non_nullable_col@1, asc)], dist_by=SinglePartition", + " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", ]; @@ -999,10 +1007,10 @@ async fn test_soft_hard_requirements_with_multiple_soft_requirements_and_output_ // " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", // ]; let expected_optimized = [ - "OutputRequirementExec", - " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", + "OutputRequirementExec: order_by=[(non_nullable_col@1, asc)], dist_by=SinglePartition", + " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", ]; @@ -1042,7 +1050,7 @@ async fn test_window_multi_path_sort() -> Result<()> { // corresponding SortExecs together. Also, the inputs of these `SortExec`s // are not necessarily the same to be able to remove them. let expected_input = [ - "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", + "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " SortPreservingMergeExec: [nullable_col@0 DESC NULLS LAST]", " UnionExec", " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", @@ -1082,7 +1090,7 @@ async fn test_window_multi_path_sort2() -> Result<()> { // The `WindowAggExec` can get its required sorting from the leaf nodes directly. // The unnecessary SortExecs should be removed let expected_input = [ - "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", + "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]", " UnionExec", " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", @@ -1091,7 +1099,7 @@ async fn test_window_multi_path_sort2() -> Result<()> { " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet", ]; let expected_optimized = [ - "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", + "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " SortPreservingMergeExec: [nullable_col@0 ASC]", " UnionExec", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet", @@ -1601,7 +1609,7 @@ async fn test_window_multi_layer_requirement() -> Result<()> { let physical_plan = bounded_window_exec("a", sort_exprs, spm); let expected_input = [ - "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", + "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " SortPreservingMergeExec: [a@0 ASC, b@1 ASC]", " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC, b@1 ASC", " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", @@ -1609,7 +1617,7 @@ async fn test_window_multi_layer_requirement() -> Result<()> { " DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false", ]; let expected_optimized = [ - "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", + "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " SortExec: expr=[a@0 ASC, b@1 ASC], preserve_partitioning=[false]", " CoalescePartitionsExec", " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10", @@ -1787,10 +1795,10 @@ async fn test_remove_unnecessary_sort_window_multilayer() -> Result<()> { let physical_plan = bounded_window_exec("non_nullable_col", ordering2, filter); let expected_input = [ - "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", + "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " FilterExec: NOT non_nullable_col@1", " SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " CoalesceBatchesExec: target_batch_size=128", " SortExec: expr=[non_nullable_col@1 DESC], preserve_partitioning=[false]", " DataSourceExec: partitions=1, partition_sizes=[0]" @@ -1799,7 +1807,7 @@ async fn test_remove_unnecessary_sort_window_multilayer() -> Result<()> { let expected_optimized = [ "WindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]", " FilterExec: NOT non_nullable_col@1", - " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " CoalesceBatchesExec: target_batch_size=128", " SortExec: expr=[non_nullable_col@1 DESC], preserve_partitioning=[false]", " DataSourceExec: partitions=1, partition_sizes=[0]" @@ -2224,17 +2232,17 @@ async fn test_multiple_sort_window_exec() -> Result<()> { let physical_plan = bounded_window_exec("non_nullable_col", ordering1, window_agg2); let expected_input = [ - "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", - " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", - " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", + "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", " DataSourceExec: partitions=1, partition_sizes=[0]", ]; let expected_optimized = [ - "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", - " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", + "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", " DataSourceExec: partitions=1, partition_sizes=[0]", ]; @@ -2261,7 +2269,7 @@ async fn test_commutativity() -> Result<()> { let expected_input = vec![ "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", - " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: partitions=1, partition_sizes=[0]", ]; assert_eq!( @@ -3107,11 +3115,11 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { required_sort_columns: vec![("nullable_col", true, false), ("count", true, false)], initial_plan: vec![ "SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], expected_plan: vec![ - "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], }, @@ -3123,12 +3131,12 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { required_sort_columns: vec![("max", false, false), ("nullable_col", true, false)], initial_plan: vec![ "SortExec: expr=[max@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[max: Ok(Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[max: Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], expected_plan: vec![ "SortExec: expr=[max@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[max: Ok(Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[max: Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet" ], }, @@ -3140,11 +3148,11 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { required_sort_columns: vec![("min", false, false), ("nullable_col", true, false)], initial_plan: vec![ "SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[min: Ok(Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[min: Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet" ], expected_plan: vec![ - "BoundedWindowAggExec: wdw=[min: Ok(Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + "BoundedWindowAggExec: wdw=[min: Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], }, @@ -3156,12 +3164,12 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { required_sort_columns: vec![("nullable_col", true, false), ("avg", true, false)], initial_plan: vec![ "SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[avg: Ok(Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[avg: Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], expected_plan: vec![ "SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[avg: Ok(Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[avg: Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], }, @@ -3177,11 +3185,11 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { required_sort_columns: vec![("nullable_col", true, false), ("count", true, true)], initial_plan: vec![ "SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], expected_plan: vec![ - "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], }, @@ -3193,11 +3201,11 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { required_sort_columns: vec![("max", true, false), ("nullable_col", true, false)], initial_plan: vec![ "SortExec: expr=[max@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[max: Ok(Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[max: Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], expected_plan: vec![ - "BoundedWindowAggExec: wdw=[max: Ok(Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + "BoundedWindowAggExec: wdw=[max: Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], }, @@ -3209,12 +3217,12 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { required_sort_columns: vec![("min", false, true), ("nullable_col", true, false)], initial_plan: vec![ "SortExec: expr=[min@2 DESC, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[min: Ok(Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[min: Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], expected_plan: vec![ "SortExec: expr=[min@2 DESC, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[min: Ok(Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[min: Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], }, @@ -3226,12 +3234,12 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { required_sort_columns: vec![("avg", true, false)], initial_plan: vec![ "SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[avg: Ok(Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[avg: Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], expected_plan: vec![ "SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[avg: Ok(Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[avg: Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], }, @@ -3247,11 +3255,11 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { required_sort_columns: vec![("nullable_col", true, false), ("count", true, false)], initial_plan: vec![ "SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], expected_plan: vec![ - "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], }, @@ -3263,12 +3271,12 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { required_sort_columns: vec![("max", true, false), ("nullable_col", true, false)], initial_plan: vec![ "SortExec: expr=[max@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[max: Ok(Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[max: Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet" ], expected_plan: vec![ "SortExec: expr=[max@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[max: Ok(Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[max: Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet" ], }, @@ -3280,12 +3288,12 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { required_sort_columns: vec![("min", false, false), ("nullable_col", true, false)], initial_plan: vec![ "SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[min: Ok(Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[min: Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], expected_plan: vec![ "SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[min: Ok(Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[min: Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], }, @@ -3297,12 +3305,12 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { required_sort_columns: vec![("nullable_col", true, false), ("avg", true, false)], initial_plan: vec![ "SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[avg: Ok(Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[avg: Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], expected_plan: vec![ "SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[avg: Ok(Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[avg: Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], }, @@ -3318,12 +3326,12 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { required_sort_columns: vec![ ("count", true, true)], initial_plan: vec![ "SortExec: expr=[count@2 ASC], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], expected_plan: vec![ "SortExec: expr=[count@2 ASC], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], }, // Case 45: @@ -3334,12 +3342,12 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { required_sort_columns: vec![("nullable_col", true, false), ("max", false, false)], initial_plan: vec![ "SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[max: Ok(Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[max: Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], expected_plan: vec![ "SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[max: Ok(Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[max: Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], }, @@ -3351,11 +3359,11 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { required_sort_columns: vec![("nullable_col", true, false), ("min", false, false)], initial_plan: vec![ "SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[min: Ok(Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[min: Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], expected_plan: vec![ - "BoundedWindowAggExec: wdw=[min: Ok(Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + "BoundedWindowAggExec: wdw=[min: Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], }, @@ -3367,11 +3375,11 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { required_sort_columns: vec![("nullable_col", true, false)], initial_plan: vec![ "SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[avg: Ok(Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[avg: Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], expected_plan: vec![ - "BoundedWindowAggExec: wdw=[avg: Ok(Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + "BoundedWindowAggExec: wdw=[avg: Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], }, @@ -3387,11 +3395,11 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { required_sort_columns: vec![("count", true, false), ("nullable_col", true, false)], initial_plan: vec![ "SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt32(1)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], expected_plan: vec![ - "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt32(1)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], }, @@ -3403,12 +3411,12 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { required_sort_columns: vec![("max", true, false)], initial_plan: vec![ "SortExec: expr=[max@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[max: Ok(Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt32(1)), end_bound: Following(UInt32(1)), is_causal: false }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[max: Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], expected_plan: vec![ "SortExec: expr=[max@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[max: Ok(Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt32(1)), end_bound: Following(UInt32(1)), is_causal: false }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[max: Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], }, @@ -3420,11 +3428,11 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { required_sort_columns: vec![("nullable_col", true, false), ("min", false, false)], initial_plan: vec![ "SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[min: Ok(Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt32(1)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[min: Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], expected_plan: vec![ - "BoundedWindowAggExec: wdw=[min: Ok(Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt32(1)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + "BoundedWindowAggExec: wdw=[min: Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], }, @@ -3436,12 +3444,12 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { required_sort_columns: vec![("avg", true, false)], initial_plan: vec![ "SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[avg: Ok(Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt32(1)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[avg: Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], expected_plan: vec![ "SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[avg: Ok(Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt32(1)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[avg: Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], }, @@ -3457,12 +3465,12 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { required_sort_columns: vec![("count", true, false), ("nullable_col", true, false)], initial_plan: vec![ "SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt32(1)), end_bound: Following(UInt32(1)), is_causal: false }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], expected_plan: vec![ "SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt32(1)), end_bound: Following(UInt32(1)), is_causal: false }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet" ], }, @@ -3474,12 +3482,12 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { required_sort_columns: vec![("nullable_col", true, false), ("max", true, false)], initial_plan: vec![ "SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[max: Ok(Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt32(1)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[max: Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], expected_plan: vec![ "SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[max: Ok(Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt32(1)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[max: Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], }, @@ -3491,12 +3499,12 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { required_sort_columns: vec![("min", true, false)], initial_plan: vec![ "SortExec: expr=[min@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[min: Ok(Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt32(1)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[min: Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], expected_plan: vec![ "SortExec: expr=[min@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[min: Ok(Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt32(1)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[min: Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], }, @@ -3508,11 +3516,11 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { required_sort_columns: vec![("nullable_col", true, false)], initial_plan: vec![ "SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[avg: Ok(Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt32(1)), end_bound: Following(UInt32(1)), is_causal: false }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[avg: Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], expected_plan: vec![ - "BoundedWindowAggExec: wdw=[avg: Ok(Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt32(1)), end_bound: Following(UInt32(1)), is_causal: false }], mode=[Sorted]", + "BoundedWindowAggExec: wdw=[avg: Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], }, @@ -3528,11 +3536,11 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { required_sort_columns: vec![("count", true, false), ("nullable_col", true, false)], initial_plan: vec![ "SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt32(1)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], expected_plan: vec![ - "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt32(1)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], }, @@ -3544,12 +3552,12 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { required_sort_columns: vec![("nullable_col", true, false), ("max", true, false)], initial_plan: vec![ "SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[max: Ok(Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt32(1)), end_bound: Following(UInt32(1)), is_causal: false }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[max: Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], expected_plan: vec![ "SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[max: Ok(Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt32(1)), end_bound: Following(UInt32(1)), is_causal: false }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[max: Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], }, @@ -3561,12 +3569,12 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { required_sort_columns: vec![("min", false, false), ("nullable_col", true, false)], initial_plan: vec![ "SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[min: Ok(Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt32(1)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[min: Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], expected_plan: vec![ "SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[min: Ok(Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt32(1)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[min: Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], }, @@ -3578,12 +3586,12 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { required_sort_columns: vec![("avg", true, false)], initial_plan: vec![ "SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[avg: Ok(Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt32(1)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[avg: Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], expected_plan: vec![ "SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[avg: Ok(Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt32(1)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[avg: Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], }, @@ -3599,12 +3607,12 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { required_sort_columns: vec![("nullable_col", true, false), ("count", true, false)], initial_plan: vec![ "SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt32(1)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], expected_plan: vec![ "SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt32(1)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], }, @@ -3616,12 +3624,12 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { required_sort_columns: vec![("nullable_col", true, false), ("max", true, true)], initial_plan: vec![ "SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[max: Ok(Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt32(1)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[max: Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], expected_plan: vec![ "SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[max: Ok(Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt32(1)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[max: Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], }, @@ -3633,12 +3641,12 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { required_sort_columns: vec![("nullable_col", true, false), ("min", false, false)], initial_plan: vec![ "SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[min: Ok(Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt32(1)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[min: Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], expected_plan: vec![ "SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[min: Ok(Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt32(1)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[min: Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], }, @@ -3650,11 +3658,11 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { required_sort_columns: vec![("nullable_col", true, false)], initial_plan: vec![ "SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[avg: Ok(Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt32(1)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + " BoundedWindowAggExec: wdw=[avg: Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], expected_plan: vec![ - "BoundedWindowAggExec: wdw=[avg: Ok(Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt32(1)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + "BoundedWindowAggExec: wdw=[avg: Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], }, @@ -3843,3 +3851,124 @@ fn test_parallelize_sort_preserves_fetch() -> Result<()> { ); Ok(()) } + +#[tokio::test] +async fn test_partial_sort_with_homogeneous_batches() -> Result<()> { + // Create schema for the table + let schema = Arc::new(Schema::new(vec![ + Field::new("a", DataType::Int32, false), + Field::new("b", DataType::Int32, false), + Field::new("c", DataType::Int32, false), + ])); + + // Create homogeneous batches - each batch has the same values for columns a and b + let batch1 = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(Int32Array::from(vec![1, 1, 1])), + Arc::new(Int32Array::from(vec![1, 1, 1])), + Arc::new(Int32Array::from(vec![3, 2, 1])), + ], + )?; + let batch2 = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(Int32Array::from(vec![2, 2, 2])), + Arc::new(Int32Array::from(vec![2, 2, 2])), + Arc::new(Int32Array::from(vec![4, 6, 5])), + ], + )?; + let batch3 = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(Int32Array::from(vec![3, 3, 3])), + Arc::new(Int32Array::from(vec![3, 3, 3])), + Arc::new(Int32Array::from(vec![9, 7, 8])), + ], + )?; + + // Create session with batch size of 3 to match our homogeneous batch pattern + let session_config = SessionConfig::new() + .with_batch_size(3) + .with_target_partitions(1); + let ctx = SessionContext::new_with_config(session_config); + + let sort_order = vec![ + SortExpr::new( + Expr::Column(datafusion_common::Column::new( + Option::::None, + "a", + )), + true, + false, + ), + SortExpr::new( + Expr::Column(datafusion_common::Column::new( + Option::::None, + "b", + )), + true, + false, + ), + ]; + let batches = Arc::new(DummyStreamPartition { + schema: schema.clone(), + batches: vec![batch1, batch2, batch3], + }) as _; + let provider = StreamingTable::try_new(schema.clone(), vec![batches])? + .with_sort_order(sort_order) + .with_infinite_table(true); + ctx.register_table("test_table", Arc::new(provider))?; + + let sql = "SELECT * FROM test_table ORDER BY a ASC, c ASC"; + let df = ctx.sql(sql).await?; + + let physical_plan = df.create_physical_plan().await?; + + // Verify that PartialSortExec is used + let plan_str = displayable(physical_plan.as_ref()).indent(true).to_string(); + assert!( + plan_str.contains("PartialSortExec"), + "Expected PartialSortExec in plan:\n{plan_str}", + ); + + let task_ctx = Arc::new(TaskContext::default()); + let mut stream = physical_plan.execute(0, task_ctx.clone())?; + + let mut collected_batches = Vec::new(); + while let Some(batch) = stream.next().await { + let batch = batch?; + if batch.num_rows() > 0 { + collected_batches.push(batch); + } + } + + // Assert we got 3 separate batches (not concatenated into fewer) + assert_eq!( + collected_batches.len(), + 3, + "Expected 3 separate batches, got {}", + collected_batches.len() + ); + + // Verify each batch has been sorted within itself + let expected_values = [vec![1, 2, 3], vec![4, 5, 6], vec![7, 8, 9]]; + + for (i, batch) in collected_batches.iter().enumerate() { + let c_array = batch + .column(2) + .as_any() + .downcast_ref::() + .unwrap(); + let actual = c_array.values().iter().copied().collect::>(); + assert_eq!(actual, expected_values[i], "Batch {i} not sorted correctly",); + } + + assert_eq!( + task_ctx.runtime_env().memory_pool.reserved(), + 0, + "Memory should be released after execution" + ); + + Ok(()) +} diff --git a/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs b/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs index f1ef365c92205..68369bc9d9061 100644 --- a/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs +++ b/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs @@ -289,7 +289,7 @@ fn test_no_pushdown_through_aggregates() { Ok: - FilterExec: b@1 = bar - CoalesceBatchesExec: target_batch_size=100 - - AggregateExec: mode=Final, gby=[a@0 as a, b@1 as b], aggr=[cnt] + - AggregateExec: mode=Final, gby=[a@0 as a, b@1 as b], aggr=[cnt], ordering_mode=PartiallySorted([0]) - CoalesceBatchesExec: target_batch_size=10 - DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=a@0 = foo " diff --git a/datafusion/core/tests/physical_optimizer/filter_pushdown/util.rs b/datafusion/core/tests/physical_optimizer/filter_pushdown/util.rs index e793af8ed4b03..ea12bea1cf89a 100644 --- a/datafusion/core/tests/physical_optimizer/filter_pushdown/util.rs +++ b/datafusion/core/tests/physical_optimizer/filter_pushdown/util.rs @@ -29,13 +29,13 @@ use datafusion_datasource::{ use datafusion_physical_expr::conjunction; use datafusion_physical_expr_common::physical_expr::fmt_sql; use datafusion_physical_optimizer::PhysicalOptimizerRule; -use datafusion_physical_plan::filter_pushdown::FilterPushdownPhase; +use datafusion_physical_plan::filter_pushdown::{FilterPushdownPhase, PushedDown}; use datafusion_physical_plan::{ displayable, filter::FilterExec, filter_pushdown::{ - ChildPushdownResult, FilterDescription, FilterPushdownPropagation, - PredicateSupport, PredicateSupports, + ChildFilterDescription, ChildPushdownResult, FilterDescription, + FilterPushdownPropagation, }, metrics::ExecutionPlanMetricsSet, DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, @@ -227,12 +227,14 @@ impl FileSource for TestSource { predicate: Some(conjunction(filters.clone())), ..self.clone() }); - Ok(FilterPushdownPropagation { - filters: PredicateSupports::all_supported(filters), - updated_node: Some(new_node), - }) + Ok(FilterPushdownPropagation::with_parent_pushdown_result( + vec![PushedDown::Yes; filters.len()], + ) + .with_updated_node(new_node)) } else { - Ok(FilterPushdownPropagation::unsupported(filters)) + Ok(FilterPushdownPropagation::with_parent_pushdown_result( + vec![PushedDown::No; filters.len()], + )) } } @@ -515,9 +517,12 @@ impl ExecutionPlan for TestNode { parent_filters: Vec>, _config: &ConfigOptions, ) -> Result { - Ok(FilterDescription::new_with_child_count(1) - .all_parent_filters_supported(parent_filters) - .with_self_filter(Arc::clone(&self.predicate))) + // Since TestNode marks all parent filters as supported and adds its own filter, + // we use from_child to create a description with all parent filters supported + let child = &self.input; + let child_desc = ChildFilterDescription::from_child(&parent_filters, child)? + .with_self_filter(Arc::clone(&self.predicate)); + Ok(FilterDescription::new().with_child(child_desc)) } fn handle_child_pushdown_result( @@ -534,28 +539,31 @@ impl ExecutionPlan for TestNode { let self_pushdown_result = child_pushdown_result.self_filters[0].clone(); // And pushed down 1 filter assert_eq!(self_pushdown_result.len(), 1); - let self_pushdown_result = self_pushdown_result.into_inner(); + let self_pushdown_result: Vec<_> = self_pushdown_result.into_iter().collect(); + + let first_pushdown_result = self_pushdown_result[0].clone(); - match &self_pushdown_result[0] { - PredicateSupport::Unsupported(filter) => { + match &first_pushdown_result.discriminant { + PushedDown::No => { // We have a filter to push down - let new_child = - FilterExec::try_new(Arc::clone(filter), Arc::clone(&self.input))?; + let new_child = FilterExec::try_new( + Arc::clone(&first_pushdown_result.predicate), + Arc::clone(&self.input), + )?; let new_self = TestNode::new(false, Arc::new(new_child), self.predicate.clone()); let mut res = - FilterPushdownPropagation::transparent(child_pushdown_result); + FilterPushdownPropagation::if_all(child_pushdown_result); res.updated_node = Some(Arc::new(new_self) as Arc); Ok(res) } - PredicateSupport::Supported(_) => { - let res = - FilterPushdownPropagation::transparent(child_pushdown_result); + PushedDown::Yes => { + let res = FilterPushdownPropagation::if_all(child_pushdown_result); Ok(res) } } } else { - let res = FilterPushdownPropagation::transparent(child_pushdown_result); + let res = FilterPushdownPropagation::if_all(child_pushdown_result); Ok(res) } } diff --git a/datafusion/core/tests/physical_optimizer/partition_statistics.rs b/datafusion/core/tests/physical_optimizer/partition_statistics.rs index 90124e0fcfc70..4b39e37f94e82 100644 --- a/datafusion/core/tests/physical_optimizer/partition_statistics.rs +++ b/datafusion/core/tests/physical_optimizer/partition_statistics.rs @@ -40,10 +40,12 @@ mod test { }; use datafusion_physical_plan::coalesce_batches::CoalesceBatchesExec; use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec; + use datafusion_physical_plan::common::compute_record_batch_statistics; use datafusion_physical_plan::empty::EmptyExec; use datafusion_physical_plan::filter::FilterExec; use datafusion_physical_plan::joins::CrossJoinExec; use datafusion_physical_plan::limit::{GlobalLimitExec, LocalLimitExec}; + use datafusion_physical_plan::placeholder_row::PlaceholderRowExec; use datafusion_physical_plan::projection::ProjectionExec; use datafusion_physical_plan::sorts::sort::SortExec; use datafusion_physical_plan::union::UnionExec; @@ -728,4 +730,32 @@ mod test { Ok(()) } + + #[tokio::test] + async fn test_statistic_by_partition_of_placeholder_rows() -> Result<()> { + let schema = + Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)])); + let plan = Arc::new(PlaceholderRowExec::new(schema).with_partitions(2)) + as Arc; + let schema = plan.schema(); + + let ctx = TaskContext::default(); + let partitions = execute_stream_partitioned(Arc::clone(&plan), Arc::new(ctx))?; + + let mut all_batches = vec![]; + for (i, partition_stream) in partitions.into_iter().enumerate() { + let batches: Vec = partition_stream.try_collect().await?; + let actual = plan.partition_statistics(Some(i))?; + let expected = + compute_record_batch_statistics(&[batches.clone()], &schema, None); + assert_eq!(actual, expected); + all_batches.push(batches); + } + + let actual = plan.partition_statistics(None)?; + let expected = compute_record_batch_statistics(&all_batches, &schema, None); + assert_eq!(actual, expected); + + Ok(()) + } } diff --git a/datafusion/core/tests/physical_optimizer/projection_pushdown.rs b/datafusion/core/tests/physical_optimizer/projection_pushdown.rs index 6964965a6431a..801cd2d571046 100644 --- a/datafusion/core/tests/physical_optimizer/projection_pushdown.rs +++ b/datafusion/core/tests/physical_optimizer/projection_pushdown.rs @@ -128,6 +128,7 @@ fn test_update_matching_exprs() -> Result<()> { )), ], Field::new("f", DataType::Int32, true).into(), + Arc::new(ConfigOptions::default()), )), Arc::new(CaseExpr::try_new( Some(Arc::new(Column::new("d", 2))), @@ -193,6 +194,7 @@ fn test_update_matching_exprs() -> Result<()> { )), ], Field::new("f", DataType::Int32, true).into(), + Arc::new(ConfigOptions::default()), )), Arc::new(CaseExpr::try_new( Some(Arc::new(Column::new("d", 3))), @@ -261,6 +263,7 @@ fn test_update_projected_exprs() -> Result<()> { )), ], Field::new("f", DataType::Int32, true).into(), + Arc::new(ConfigOptions::default()), )), Arc::new(CaseExpr::try_new( Some(Arc::new(Column::new("d", 2))), @@ -326,6 +329,7 @@ fn test_update_projected_exprs() -> Result<()> { )), ], Field::new("f", DataType::Int32, true).into(), + Arc::new(ConfigOptions::default()), )), Arc::new(CaseExpr::try_new( Some(Arc::new(Column::new("d_new", 3))), @@ -729,7 +733,7 @@ fn test_output_req_after_projection() -> Result<()> { actual, @r" ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b] - OutputRequirementExec + OutputRequirementExec: order_by=[(b@1, asc), (c@2 + a@0, asc)], dist_by=HashPartitioned[[a@0, b@1]]) DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false " ); @@ -745,7 +749,7 @@ fn test_output_req_after_projection() -> Result<()> { assert_snapshot!( actual, @r" - OutputRequirementExec + OutputRequirementExec: order_by=[(b@2, asc), (c@0 + new_a@1, asc)], dist_by=HashPartitioned[[new_a@1, b@2]]) ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b] DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false " diff --git a/datafusion/core/tests/physical_optimizer/sanity_checker.rs b/datafusion/core/tests/physical_optimizer/sanity_checker.rs index 5d62ea4ccb20e..6233f5d09c56e 100644 --- a/datafusion/core/tests/physical_optimizer/sanity_checker.rs +++ b/datafusion/core/tests/physical_optimizer/sanity_checker.rs @@ -420,7 +420,7 @@ async fn test_bounded_window_agg_sort_requirement() -> Result<()> { assert_snapshot!( actual, @r#" - BoundedWindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] SortExec: expr=[c9@0 ASC NULLS LAST], preserve_partitioning=[false] DataSourceExec: partitions=1, partition_sizes=[0] "# @@ -448,7 +448,7 @@ async fn test_bounded_window_agg_no_sort_requirement() -> Result<()> { assert_snapshot!( actual, @r#" - BoundedWindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] DataSourceExec: partitions=1, partition_sizes=[0] "# ); diff --git a/datafusion/core/tests/sql/aggregates.rs b/datafusion/core/tests/sql/aggregates/basic.rs similarity index 100% rename from datafusion/core/tests/sql/aggregates.rs rename to datafusion/core/tests/sql/aggregates/basic.rs diff --git a/datafusion/core/tests/sql/aggregates/dict_nulls.rs b/datafusion/core/tests/sql/aggregates/dict_nulls.rs new file mode 100644 index 0000000000000..da4b2c8d25c9d --- /dev/null +++ b/datafusion/core/tests/sql/aggregates/dict_nulls.rs @@ -0,0 +1,454 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use super::*; +use datafusion::common::test_util::batches_to_string; +use insta::assert_snapshot; + +/// Comprehensive test for aggregate functions with null values and dictionary columns +/// Tests COUNT, SUM, MIN, and MEDIAN null handling in single comprehensive test +#[tokio::test] +async fn test_aggregates_null_handling_comprehensive() -> Result<()> { + let test_data_basic = TestData::new(); + let test_data_extended = TestData::new_extended(); + let test_data_min_max = TestData::new_for_min_max(); + let test_data_median = TestData::new_for_median(); + + // Test COUNT null exclusion with basic data + let sql_count = "SELECT dict_null_keys, COUNT(value) as cnt FROM t GROUP BY dict_null_keys ORDER BY dict_null_keys NULLS FIRST"; + let results_count = run_snapshot_test(&test_data_basic, sql_count).await?; + + assert_snapshot!( + batches_to_string(&results_count), + @r###" + +----------------+-----+ + | dict_null_keys | cnt | + +----------------+-----+ + | | 0 | + | group_a | 2 | + | group_b | 1 | + +----------------+-----+ + "### + ); + + // Test SUM null handling with extended data + let sql_sum = "SELECT dict_null_vals, SUM(value) as total FROM t GROUP BY dict_null_vals ORDER BY dict_null_vals NULLS FIRST"; + let results_sum = run_snapshot_test(&test_data_extended, sql_sum).await?; + + assert_snapshot!( + batches_to_string(&results_sum), + @r" + +----------------+-------+ + | dict_null_vals | total | + +----------------+-------+ + | | 4 | + | group_x | 4 | + | group_y | 2 | + | group_z | 5 | + +----------------+-------+ + " + ); + + // Test MIN null handling with min/max data + let sql_min = "SELECT dict_null_keys, MIN(value) as minimum FROM t GROUP BY dict_null_keys ORDER BY dict_null_keys NULLS FIRST"; + let results_min = run_snapshot_test(&test_data_min_max, sql_min).await?; + + assert_snapshot!( + batches_to_string(&results_min), + @r###" + +----------------+---------+ + | dict_null_keys | minimum | + +----------------+---------+ + | | 2 | + | group_a | 3 | + | group_b | 1 | + | group_c | 7 | + +----------------+---------+ + "### + ); + + // Test MEDIAN null handling with median data + let sql_median = "SELECT dict_null_vals, MEDIAN(value) as median_value FROM t GROUP BY dict_null_vals ORDER BY dict_null_vals NULLS FIRST"; + let results_median = run_snapshot_test(&test_data_median, sql_median).await?; + + assert_snapshot!( + batches_to_string(&results_median), + @r" + +----------------+--------------+ + | dict_null_vals | median_value | + +----------------+--------------+ + | | 3 | + | group_x | 1 | + | group_y | 5 | + | group_z | 7 | + +----------------+--------------+ + "); + + Ok(()) +} + +/// Test FIRST_VAL and LAST_VAL with null values and GROUP BY dict with null keys and null values - may return null if first/last value is null (single and multiple partitions) +#[tokio::test] +async fn test_first_last_val_null_handling() -> Result<()> { + let test_data = TestData::new_for_first_last(); + + // Test FIRST_VALUE and LAST_VALUE with window functions over groups + let sql = "SELECT dict_null_keys, value, FIRST_VALUE(value) OVER (PARTITION BY dict_null_keys ORDER BY value NULLS FIRST) as first_val, LAST_VALUE(value) OVER (PARTITION BY dict_null_keys ORDER BY value NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as last_val FROM t ORDER BY dict_null_keys NULLS FIRST, value NULLS FIRST"; + + let results_single = run_snapshot_test(&test_data, sql).await?; + + assert_snapshot!(batches_to_string(&results_single), @r" + +----------------+-------+-----------+----------+ + | dict_null_keys | value | first_val | last_val | + +----------------+-------+-----------+----------+ + | | 1 | 1 | 3 | + | | 3 | 1 | 3 | + | group_a | | | | + | group_a | | | | + | group_b | 2 | 2 | 2 | + +----------------+-------+-----------+----------+ + "); + + Ok(()) +} + +/// Test FIRST_VALUE and LAST_VALUE with ORDER BY - comprehensive null handling +#[tokio::test] +async fn test_first_last_value_order_by_null_handling() -> Result<()> { + let ctx = SessionContext::new(); + + // Create test data with nulls mixed in + let dict_keys = create_test_dict( + &[Some("group_a"), Some("group_b"), Some("group_c")], + &[Some(0), Some(1), Some(2), Some(0), Some(1)], + ); + + let values = Int32Array::from(vec![None, Some(10), Some(20), Some(5), None]); + + let schema = Arc::new(Schema::new(vec![ + Field::new("dict_group", string_dict_type(), true), + Field::new("value", DataType::Int32, true), + ])); + + let batch = RecordBatch::try_new( + schema.clone(), + vec![Arc::new(dict_keys), Arc::new(values)], + )?; + + let table = MemTable::try_new(schema, vec![vec![batch]])?; + ctx.register_table("test_data", Arc::new(table))?; + + // Test all combinations of FIRST_VALUE and LAST_VALUE with null handling + let sql = "SELECT + dict_group, + value, + FIRST_VALUE(value IGNORE NULLS) OVER (ORDER BY value NULLS LAST) as first_ignore_nulls, + FIRST_VALUE(value RESPECT NULLS) OVER (ORDER BY value NULLS FIRST) as first_respect_nulls, + LAST_VALUE(value IGNORE NULLS) OVER (ORDER BY value NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as last_ignore_nulls, + LAST_VALUE(value RESPECT NULLS) OVER (ORDER BY value NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as last_respect_nulls + FROM test_data + ORDER BY value NULLS LAST"; + + let df = ctx.sql(sql).await?; + let results = df.collect().await?; + + assert_snapshot!( + batches_to_string(&results), + @r###" + +------------+-------+--------------------+---------------------+-------------------+--------------------+ + | dict_group | value | first_ignore_nulls | first_respect_nulls | last_ignore_nulls | last_respect_nulls | + +------------+-------+--------------------+---------------------+-------------------+--------------------+ + | group_a | 5 | 5 | | 20 | | + | group_b | 10 | 5 | | 20 | | + | group_c | 20 | 5 | | 20 | | + | group_a | | 5 | | 20 | | + | group_b | | 5 | | 20 | | + +------------+-------+--------------------+---------------------+-------------------+--------------------+ + "### + ); + + Ok(()) +} + +/// Test GROUP BY with dictionary columns containing null keys and values for FIRST_VALUE/LAST_VALUE +#[tokio::test] +async fn test_first_last_value_group_by_dict_nulls() -> Result<()> { + let ctx = SessionContext::new(); + + // Create dictionary with null keys + let dict_null_keys = create_test_dict( + &[Some("group_a"), Some("group_b")], + &[ + Some(0), // group_a + None, // null key + Some(1), // group_b + None, // null key + Some(0), // group_a + ], + ); + + // Create dictionary with null values + let dict_null_vals = create_test_dict( + &[Some("val_x"), None, Some("val_y")], + &[ + Some(0), // val_x + Some(1), // null value + Some(2), // val_y + Some(1), // null value + Some(0), // val_x + ], + ); + + // Create test values + let values = Int32Array::from(vec![Some(10), Some(20), Some(30), Some(40), Some(50)]); + + let schema = Arc::new(Schema::new(vec![ + Field::new("dict_null_keys", string_dict_type(), true), + Field::new("dict_null_vals", string_dict_type(), true), + Field::new("value", DataType::Int32, true), + ])); + + let batch = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(dict_null_keys), + Arc::new(dict_null_vals), + Arc::new(values), + ], + )?; + + let table = MemTable::try_new(schema, vec![vec![batch]])?; + ctx.register_table("test_data", Arc::new(table))?; + + // Test GROUP BY with null keys + let sql = "SELECT + dict_null_keys, + FIRST_VALUE(value) as first_val, + LAST_VALUE(value) as last_val, + COUNT(*) as cnt + FROM test_data + GROUP BY dict_null_keys + ORDER BY dict_null_keys NULLS FIRST"; + + let df = ctx.sql(sql).await?; + let results = df.collect().await?; + + assert_snapshot!( + batches_to_string(&results), + @r###" + +----------------+-----------+----------+-----+ + | dict_null_keys | first_val | last_val | cnt | + +----------------+-----------+----------+-----+ + | | 20 | 40 | 2 | + | group_a | 10 | 50 | 2 | + | group_b | 30 | 30 | 1 | + +----------------+-----------+----------+-----+ + "### + ); + + // Test GROUP BY with null values in dictionary + let sql2 = "SELECT + dict_null_vals, + FIRST_VALUE(value) as first_val, + LAST_VALUE(value) as last_val, + COUNT(*) as cnt + FROM test_data + GROUP BY dict_null_vals + ORDER BY dict_null_vals NULLS FIRST"; + + let df2 = ctx.sql(sql2).await?; + let results2 = df2.collect().await?; + + assert_snapshot!( + batches_to_string(&results2), + @r###" + +----------------+-----------+----------+-----+ + | dict_null_vals | first_val | last_val | cnt | + +----------------+-----------+----------+-----+ + | | 20 | 40 | 2 | + | val_x | 10 | 50 | 2 | + | val_y | 30 | 30 | 1 | + +----------------+-----------+----------+-----+ + "### + ); + + Ok(()) +} + +/// Test MAX with dictionary columns containing null keys and values as specified in the SQL query +#[tokio::test] +async fn test_max_with_fuzz_table_dict_nulls() -> Result<()> { + let (ctx_single, ctx_multi) = setup_fuzz_test_contexts().await?; + + // Execute the SQL query with MAX aggregations + let sql = "SELECT + u8_low, + dictionary_utf8_low, + utf8_low, + max(utf8_low) as col1, + max(utf8) as col2 + FROM + fuzz_table + GROUP BY + u8_low, + dictionary_utf8_low, + utf8_low + ORDER BY u8_low, dictionary_utf8_low NULLS FIRST, utf8_low"; + + let results = test_query_consistency(&ctx_single, &ctx_multi, sql).await?; + + assert_snapshot!( + batches_to_string(&results), + @r" + +--------+---------------------+----------+-------+---------+ + | u8_low | dictionary_utf8_low | utf8_low | col1 | col2 | + +--------+---------------------+----------+-------+---------+ + | 1 | | str_b | str_b | value_2 | + | 1 | dict_a | str_a | str_a | value_5 | + | 2 | | str_c | str_c | value_7 | + | 2 | | str_d | str_d | value_4 | + | 2 | dict_b | str_c | str_c | value_3 | + | 3 | | str_e | str_e | | + | 3 | dict_c | str_f | str_f | value_6 | + +--------+---------------------+----------+-------+---------+ + "); + + Ok(()) +} + +/// Test MIN with fuzz table containing dictionary columns with null keys and values and timestamp data (single and multiple partitions) +#[tokio::test] +async fn test_min_timestamp_with_fuzz_table_dict_nulls() -> Result<()> { + let (ctx_single, ctx_multi) = setup_fuzz_timestamp_test_contexts().await?; + + // Execute the SQL query with MIN aggregation on timestamp + let sql = "SELECT + utf8_low, + u8_low, + dictionary_utf8_low, + min(timestamp_us) as col1 + FROM + fuzz_table + GROUP BY + utf8_low, + u8_low, + dictionary_utf8_low + ORDER BY utf8_low, u8_low, dictionary_utf8_low NULLS FIRST"; + + let results = test_query_consistency(&ctx_single, &ctx_multi, sql).await?; + + assert_snapshot!( + batches_to_string(&results), + @r" + +----------+--------+---------------------+-------------------------+ + | utf8_low | u8_low | dictionary_utf8_low | col1 | + +----------+--------+---------------------+-------------------------+ + | alpha | 10 | dict_x | 1970-01-01T00:00:01 | + | beta | 20 | | 1970-01-01T00:00:02 | + | delta | 20 | | 1970-01-01T00:00:03.500 | + | epsilon | 40 | | 1970-01-01T00:00:04 | + | gamma | 30 | dict_y | 1970-01-01T00:00:02.800 | + | zeta | 30 | dict_z | 1970-01-01T00:00:02.500 | + +----------+--------+---------------------+-------------------------+ + " + ); + + Ok(()) +} + +/// Test COUNT and COUNT DISTINCT with fuzz table containing dictionary columns with null keys and values (single and multiple partitions) +#[tokio::test] +async fn test_count_distinct_with_fuzz_table_dict_nulls() -> Result<()> { + let (ctx_single, ctx_multi) = setup_fuzz_count_test_contexts().await?; + + // Execute the SQL query with COUNT and COUNT DISTINCT aggregations + let sql = "SELECT + u8_low, + utf8_low, + dictionary_utf8_low, + count(duration_nanosecond) as col1, + count(DISTINCT large_binary) as col2 + FROM + fuzz_table + GROUP BY + u8_low, + utf8_low, + dictionary_utf8_low + ORDER BY u8_low, utf8_low, dictionary_utf8_low NULLS FIRST"; + + let results = test_query_consistency(&ctx_single, &ctx_multi, sql).await?; + + assert_snapshot!( + batches_to_string(&results), + @r###" + +--------+----------+---------------------+------+------+ + | u8_low | utf8_low | dictionary_utf8_low | col1 | col2 | + +--------+----------+---------------------+------+------+ + | 5 | text_a | group_alpha | 3 | 1 | + | 10 | text_b | | 1 | 1 | + | 10 | text_d | | 2 | 0 | + | 15 | text_c | group_beta | 1 | 1 | + | 20 | text_e | | 0 | 1 | + | 25 | text_f | group_gamma | 1 | 1 | + +--------+----------+---------------------+------+------+ + "### + ); + + Ok(()) +} + +/// Test MEDIAN and MEDIAN DISTINCT with fuzz table containing various numeric types and dictionary columns with null keys and values (single and multiple partitions) +#[tokio::test] +async fn test_median_distinct_with_fuzz_table_dict_nulls() -> Result<()> { + let (ctx_single, ctx_multi) = setup_fuzz_median_test_contexts().await?; + + // Execute the SQL query with MEDIAN and MEDIAN DISTINCT aggregations + let sql = "SELECT + u8_low, + dictionary_utf8_low, + median(DISTINCT u64) as col1, + median(DISTINCT u16) as col2, + median(u64) as col3, + median(decimal128) as col4, + median(DISTINCT u32) as col5 + FROM + fuzz_table + GROUP BY + u8_low, + dictionary_utf8_low + ORDER BY u8_low, dictionary_utf8_low NULLS FIRST"; + + let results = test_query_consistency(&ctx_single, &ctx_multi, sql).await?; + + assert_snapshot!( + batches_to_string(&results), + @r" + +--------+---------------------+------+------+------+--------+--------+ + | u8_low | dictionary_utf8_low | col1 | col2 | col3 | col4 | col5 | + +--------+---------------------+------+------+------+--------+--------+ + | 50 | | | 30 | | 987.65 | 400000 | + | 50 | group_three | 5000 | 50 | 5000 | 555.55 | 500000 | + | 75 | | 4000 | | 4000 | | 450000 | + | 100 | group_one | 1100 | 11 | 1000 | 123.45 | 110000 | + | 100 | group_two | 1500 | 15 | 1500 | 111.11 | 150000 | + | 200 | | 2500 | 22 | 2500 | 506.11 | 250000 | + +--------+---------------------+------+------+------+--------+--------+ + " + ); + + Ok(()) +} diff --git a/datafusion/core/tests/sql/aggregates/mod.rs b/datafusion/core/tests/sql/aggregates/mod.rs new file mode 100644 index 0000000000000..321c158628e43 --- /dev/null +++ b/datafusion/core/tests/sql/aggregates/mod.rs @@ -0,0 +1,1026 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Aggregate function tests + +use super::*; +use arrow::{ + array::{ + types::UInt32Type, Decimal128Array, DictionaryArray, DurationNanosecondArray, + Int32Array, LargeBinaryArray, StringArray, TimestampMicrosecondArray, + UInt16Array, UInt32Array, UInt64Array, UInt8Array, + }, + datatypes::{DataType, Field, Schema, TimeUnit}, + record_batch::RecordBatch, +}; +use datafusion::{ + common::{test_util::batches_to_string, Result}, + execution::{config::SessionConfig, context::SessionContext}, +}; +use datafusion_catalog::MemTable; +use std::{cmp::min, sync::Arc}; +/// Helper function to create the commonly used UInt32 indexed UTF-8 dictionary data type +pub fn string_dict_type() -> DataType { + DataType::Dictionary(Box::new(DataType::UInt32), Box::new(DataType::Utf8)) +} + +/// Helper functions for aggregate tests with dictionary columns and nulls +/// Creates a dictionary array with null values in the dictionary +pub fn create_test_dict( + values: &[Option<&str>], + indices: &[Option], +) -> DictionaryArray { + let dict_values = StringArray::from(values.to_vec()); + let dict_indices = UInt32Array::from(indices.to_vec()); + DictionaryArray::new(dict_indices, Arc::new(dict_values)) +} + +/// Creates test data with both dictionary columns and value column +pub struct TestData { + pub dict_null_keys: DictionaryArray, + pub dict_null_vals: DictionaryArray, + pub values: Int32Array, + pub schema: Arc, +} + +impl TestData { + pub fn new() -> Self { + // Create dictionary with null keys + let dict_null_keys = create_test_dict( + &[Some("group_a"), Some("group_b")], + &[ + Some(0), // group_a + None, // null key + Some(1), // group_b + None, // null key + Some(0), // group_a + ], + ); + + // Create dictionary with null values + let dict_null_vals = create_test_dict( + &[Some("group_x"), None, Some("group_y")], + &[ + Some(0), // group_x + Some(1), // null value + Some(2), // group_y + Some(1), // null value + Some(0), // group_x + ], + ); + + // Create test data with nulls + let values = Int32Array::from(vec![Some(1), None, Some(2), None, Some(3)]); + + let schema = Arc::new(Schema::new(vec![ + Field::new("dict_null_keys", string_dict_type(), true), + Field::new("dict_null_vals", string_dict_type(), true), + Field::new("value", DataType::Int32, true), + ])); + + Self { + dict_null_keys, + dict_null_vals, + values, + schema, + } + } + + /// Creates extended test data for more comprehensive testing + pub fn new_extended() -> Self { + // Create dictionary with null values in the dictionary array + let dict_null_vals = create_test_dict( + &[Some("group_a"), None, Some("group_b")], + &[ + Some(0), // group_a + Some(1), // null value + Some(2), // group_b + Some(1), // null value + Some(0), // group_a + Some(1), // null value + Some(2), // group_b + Some(1), // null value + ], + ); + + // Create dictionary with null keys + let dict_null_keys = create_test_dict( + &[Some("group_x"), Some("group_y"), Some("group_z")], + &[ + Some(0), // group_x + None, // null key + Some(1), // group_y + None, // null key + Some(0), // group_x + None, // null key + Some(2), // group_z + None, // null key + ], + ); + + // Create test data with nulls + let values = Int32Array::from(vec![ + Some(1), + None, + Some(2), + None, + Some(3), + Some(4), + Some(5), + None, + ]); + + let schema = Arc::new(Schema::new(vec![ + Field::new("dict_null_vals", string_dict_type(), true), + Field::new("dict_null_keys", string_dict_type(), true), + Field::new("value", DataType::Int32, true), + ])); + + Self { + dict_null_keys, + dict_null_vals, + values, + schema, + } + } + + /// Creates test data for MIN/MAX testing with varied values + pub fn new_for_min_max() -> Self { + let dict_null_keys = create_test_dict( + &[Some("group_a"), Some("group_b"), Some("group_c")], + &[ + Some(0), + Some(1), + Some(0), + Some(2), + None, + None, // group_a, group_b, group_a, group_c, null, null + ], + ); + + let dict_null_vals = create_test_dict( + &[Some("group_x"), None, Some("group_y")], + &[ + Some(0), + Some(1), + Some(0), + Some(2), + Some(1), + Some(1), // group_x, null, group_x, group_y, null, null + ], + ); + + let values = + Int32Array::from(vec![Some(5), Some(1), Some(3), Some(7), Some(2), None]); + + let schema = Arc::new(Schema::new(vec![ + Field::new("dict_null_keys", string_dict_type(), true), + Field::new("dict_null_vals", string_dict_type(), true), + Field::new("value", DataType::Int32, true), + ])); + + Self { + dict_null_keys, + dict_null_vals, + values, + schema, + } + } + + /// Creates test data for MEDIAN testing with varied values + pub fn new_for_median() -> Self { + let dict_null_vals = create_test_dict( + &[Some("group_a"), None, Some("group_b")], + &[Some(0), Some(1), Some(2), Some(1), Some(0)], + ); + + let dict_null_keys = create_test_dict( + &[Some("group_x"), Some("group_y"), Some("group_z")], + &[Some(0), None, Some(1), None, Some(2)], + ); + + let values = Int32Array::from(vec![Some(1), None, Some(5), Some(3), Some(7)]); + + let schema = Arc::new(Schema::new(vec![ + Field::new("dict_null_vals", string_dict_type(), true), + Field::new("dict_null_keys", string_dict_type(), true), + Field::new("value", DataType::Int32, true), + ])); + + Self { + dict_null_keys, + dict_null_vals, + values, + schema, + } + } + + /// Creates test data for FIRST_VALUE/LAST_VALUE testing + pub fn new_for_first_last() -> Self { + let dict_null_keys = create_test_dict( + &[Some("group_a"), Some("group_b")], + &[Some(0), None, Some(1), None, Some(0)], + ); + + let dict_null_vals = create_test_dict( + &[Some("group_x"), None, Some("group_y")], + &[Some(0), Some(1), Some(2), Some(1), Some(0)], + ); + + let values = Int32Array::from(vec![None, Some(1), Some(2), Some(3), None]); + + let schema = Arc::new(Schema::new(vec![ + Field::new("dict_null_keys", string_dict_type(), true), + Field::new("dict_null_vals", string_dict_type(), true), + Field::new("value", DataType::Int32, true), + ])); + + Self { + dict_null_keys, + dict_null_vals, + values, + schema, + } + } +} + +/// Sets up test contexts for TestData with both single and multiple partitions +pub async fn setup_test_contexts( + test_data: &TestData, +) -> Result<(SessionContext, SessionContext)> { + // Single partition context + let ctx_single = create_context_with_partitions(test_data, 1).await?; + + // Multiple partition context + let ctx_multi = create_context_with_partitions(test_data, 3).await?; + + Ok((ctx_single, ctx_multi)) +} + +/// Creates a session context with the specified number of partitions and registers test data +pub async fn create_context_with_partitions( + test_data: &TestData, + num_partitions: usize, +) -> Result { + let ctx = SessionContext::new_with_config( + SessionConfig::new().with_target_partitions(num_partitions), + ); + + let batches = split_test_data_into_batches(test_data, num_partitions)?; + let provider = MemTable::try_new(test_data.schema.clone(), batches)?; + ctx.register_table("t", Arc::new(provider))?; + + Ok(ctx) +} + +/// Splits test data into multiple batches for partitioning +pub fn split_test_data_into_batches( + test_data: &TestData, + num_partitions: usize, +) -> Result>> { + debug_assert!(num_partitions > 0, "num_partitions must be greater than 0"); + let total_len = test_data.values.len(); + let chunk_size = total_len.div_ceil(num_partitions); // Ensure we cover all data + + let mut batches = Vec::new(); + let mut start = 0; + + while start < total_len { + let end = min(start + chunk_size, total_len); + let len = end - start; + + if len > 0 { + let batch = RecordBatch::try_new( + test_data.schema.clone(), + vec![ + Arc::new(test_data.dict_null_keys.slice(start, len)), + Arc::new(test_data.dict_null_vals.slice(start, len)), + Arc::new(test_data.values.slice(start, len)), + ], + )?; + batches.push(vec![batch]); + } + start = end; + } + + Ok(batches) +} + +/// Executes a query on both single and multi-partition contexts and verifies consistency +pub async fn test_query_consistency( + ctx_single: &SessionContext, + ctx_multi: &SessionContext, + sql: &str, +) -> Result> { + let df_single = ctx_single.sql(sql).await?; + let results_single = df_single.collect().await?; + + let df_multi = ctx_multi.sql(sql).await?; + let results_multi = df_multi.collect().await?; + + // Verify results are consistent between single and multiple partitions + assert_eq!( + batches_to_string(&results_single), + batches_to_string(&results_multi), + "Results should be identical between single and multiple partitions" + ); + + Ok(results_single) +} + +/// Helper function to run snapshot tests with consistent setup, execution, and assertion +/// This reduces the repetitive pattern of "setup data → SQL → assert_snapshot!" +pub async fn run_snapshot_test( + test_data: &TestData, + sql: &str, +) -> Result> { + let (ctx_single, ctx_multi) = setup_test_contexts(test_data).await?; + let results = test_query_consistency(&ctx_single, &ctx_multi, sql).await?; + Ok(results) +} + +/// Test data structure for fuzz table with dictionary columns containing nulls +pub struct FuzzTestData { + pub schema: Arc, + pub u8_low: UInt8Array, + pub dictionary_utf8_low: DictionaryArray, + pub utf8_low: StringArray, + pub utf8: StringArray, +} + +impl FuzzTestData { + pub fn new() -> Self { + // Create dictionary columns with null keys and values + let dictionary_utf8_low = create_test_dict( + &[Some("dict_a"), None, Some("dict_b"), Some("dict_c")], + &[ + Some(0), // dict_a + Some(1), // null value + Some(2), // dict_b + None, // null key + Some(0), // dict_a + Some(1), // null value + Some(3), // dict_c + None, // null key + ], + ); + + let u8_low = UInt8Array::from(vec![ + Some(1), + Some(1), + Some(2), + Some(2), + Some(1), + Some(3), + Some(3), + Some(2), + ]); + + let utf8_low = StringArray::from(vec![ + Some("str_a"), + Some("str_b"), + Some("str_c"), + Some("str_d"), + Some("str_a"), + Some("str_e"), + Some("str_f"), + Some("str_c"), + ]); + + let utf8 = StringArray::from(vec![ + Some("value_1"), + Some("value_2"), + Some("value_3"), + Some("value_4"), + Some("value_5"), + None, + Some("value_6"), + Some("value_7"), + ]); + + let schema = Arc::new(Schema::new(vec![ + Field::new("u8_low", DataType::UInt8, true), + Field::new("dictionary_utf8_low", string_dict_type(), true), + Field::new("utf8_low", DataType::Utf8, true), + Field::new("utf8", DataType::Utf8, true), + ])); + + Self { + schema, + u8_low, + dictionary_utf8_low, + utf8_low, + utf8, + } + } +} + +/// Sets up test contexts for fuzz table with both single and multiple partitions +pub async fn setup_fuzz_test_contexts() -> Result<(SessionContext, SessionContext)> { + let test_data = FuzzTestData::new(); + + // Single partition context + let ctx_single = create_fuzz_context_with_partitions(&test_data, 1).await?; + + // Multiple partition context + let ctx_multi = create_fuzz_context_with_partitions(&test_data, 3).await?; + + Ok((ctx_single, ctx_multi)) +} + +/// Creates a session context with fuzz table partitioned into specified number of partitions +pub async fn create_fuzz_context_with_partitions( + test_data: &FuzzTestData, + num_partitions: usize, +) -> Result { + let ctx = SessionContext::new_with_config( + SessionConfig::new().with_target_partitions(num_partitions), + ); + + let batches = split_fuzz_data_into_batches(test_data, num_partitions)?; + let provider = MemTable::try_new(test_data.schema.clone(), batches)?; + ctx.register_table("fuzz_table", Arc::new(provider))?; + + Ok(ctx) +} + +/// Splits fuzz test data into multiple batches for partitioning +pub fn split_fuzz_data_into_batches( + test_data: &FuzzTestData, + num_partitions: usize, +) -> Result>> { + debug_assert!(num_partitions > 0, "num_partitions must be greater than 0"); + let total_len = test_data.u8_low.len(); + let chunk_size = total_len.div_ceil(num_partitions); + + let mut batches = Vec::new(); + let mut start = 0; + + while start < total_len { + let end = min(start + chunk_size, total_len); + let len = end - start; + + if len > 0 { + let batch = RecordBatch::try_new( + test_data.schema.clone(), + vec![ + Arc::new(test_data.u8_low.slice(start, len)), + Arc::new(test_data.dictionary_utf8_low.slice(start, len)), + Arc::new(test_data.utf8_low.slice(start, len)), + Arc::new(test_data.utf8.slice(start, len)), + ], + )?; + batches.push(vec![batch]); + } + start = end; + } + + Ok(batches) +} + +/// Test data structure for fuzz table with duration, large_binary and dictionary columns containing nulls +pub struct FuzzCountTestData { + pub schema: Arc, + pub u8_low: UInt8Array, + pub utf8_low: StringArray, + pub dictionary_utf8_low: DictionaryArray, + pub duration_nanosecond: DurationNanosecondArray, + pub large_binary: LargeBinaryArray, +} + +impl FuzzCountTestData { + pub fn new() -> Self { + // Create dictionary columns with null keys and values + let dictionary_utf8_low = create_test_dict( + &[ + Some("group_alpha"), + None, + Some("group_beta"), + Some("group_gamma"), + ], + &[ + Some(0), // group_alpha + Some(1), // null value + Some(2), // group_beta + None, // null key + Some(0), // group_alpha + Some(1), // null value + Some(3), // group_gamma + None, // null key + Some(2), // group_beta + Some(0), // group_alpha + ], + ); + + let u8_low = UInt8Array::from(vec![ + Some(5), + Some(10), + Some(15), + Some(10), + Some(5), + Some(20), + Some(25), + Some(10), + Some(15), + Some(5), + ]); + + let utf8_low = StringArray::from(vec![ + Some("text_a"), + Some("text_b"), + Some("text_c"), + Some("text_d"), + Some("text_a"), + Some("text_e"), + Some("text_f"), + Some("text_d"), + Some("text_c"), + Some("text_a"), + ]); + + // Create duration data with some nulls (nanoseconds) + let duration_nanosecond = DurationNanosecondArray::from(vec![ + Some(1000000000), // 1 second + Some(2000000000), // 2 seconds + None, // null duration + Some(3000000000), // 3 seconds + Some(1500000000), // 1.5 seconds + None, // null duration + Some(4000000000), // 4 seconds + Some(2500000000), // 2.5 seconds + Some(3500000000), // 3.5 seconds + Some(1200000000), // 1.2 seconds + ]); + + // Create large binary data with some nulls and duplicates + let large_binary = LargeBinaryArray::from(vec![ + Some(b"binary_data_1".as_slice()), + Some(b"binary_data_2".as_slice()), + Some(b"binary_data_3".as_slice()), + None, // null binary + Some(b"binary_data_1".as_slice()), // duplicate + Some(b"binary_data_4".as_slice()), + Some(b"binary_data_5".as_slice()), + None, // null binary + Some(b"binary_data_3".as_slice()), // duplicate + Some(b"binary_data_1".as_slice()), // duplicate + ]); + + let schema = Arc::new(Schema::new(vec![ + Field::new("u8_low", DataType::UInt8, true), + Field::new("utf8_low", DataType::Utf8, true), + Field::new("dictionary_utf8_low", string_dict_type(), true), + Field::new( + "duration_nanosecond", + DataType::Duration(TimeUnit::Nanosecond), + true, + ), + Field::new("large_binary", DataType::LargeBinary, true), + ])); + + Self { + schema, + u8_low, + utf8_low, + dictionary_utf8_low, + duration_nanosecond, + large_binary, + } + } +} + +/// Sets up test contexts for fuzz table with duration/binary columns and both single and multiple partitions +pub async fn setup_fuzz_count_test_contexts() -> Result<(SessionContext, SessionContext)> +{ + let test_data = FuzzCountTestData::new(); + + // Single partition context + let ctx_single = create_fuzz_count_context_with_partitions(&test_data, 1).await?; + + // Multiple partition context + let ctx_multi = create_fuzz_count_context_with_partitions(&test_data, 3).await?; + + Ok((ctx_single, ctx_multi)) +} + +/// Creates a session context with fuzz count table partitioned into specified number of partitions +pub async fn create_fuzz_count_context_with_partitions( + test_data: &FuzzCountTestData, + num_partitions: usize, +) -> Result { + let ctx = SessionContext::new_with_config( + SessionConfig::new().with_target_partitions(num_partitions), + ); + + let batches = split_fuzz_count_data_into_batches(test_data, num_partitions)?; + let provider = MemTable::try_new(test_data.schema.clone(), batches)?; + ctx.register_table("fuzz_table", Arc::new(provider))?; + + Ok(ctx) +} + +/// Splits fuzz count test data into multiple batches for partitioning +pub fn split_fuzz_count_data_into_batches( + test_data: &FuzzCountTestData, + num_partitions: usize, +) -> Result>> { + debug_assert!(num_partitions > 0, "num_partitions must be greater than 0"); + let total_len = test_data.u8_low.len(); + let chunk_size = total_len.div_ceil(num_partitions); + + let mut batches = Vec::new(); + let mut start = 0; + + while start < total_len { + let end = min(start + chunk_size, total_len); + let len = end - start; + + if len > 0 { + let batch = RecordBatch::try_new( + test_data.schema.clone(), + vec![ + Arc::new(test_data.u8_low.slice(start, len)), + Arc::new(test_data.utf8_low.slice(start, len)), + Arc::new(test_data.dictionary_utf8_low.slice(start, len)), + Arc::new(test_data.duration_nanosecond.slice(start, len)), + Arc::new(test_data.large_binary.slice(start, len)), + ], + )?; + batches.push(vec![batch]); + } + start = end; + } + + Ok(batches) +} + +/// Test data structure for fuzz table with numeric types for median testing and dictionary columns containing nulls +pub struct FuzzMedianTestData { + pub schema: Arc, + pub u8_low: UInt8Array, + pub dictionary_utf8_low: DictionaryArray, + pub u64: UInt64Array, + pub u16: UInt16Array, + pub u32: UInt32Array, + pub decimal128: Decimal128Array, +} + +impl FuzzMedianTestData { + pub fn new() -> Self { + // Create dictionary columns with null keys and values + let dictionary_utf8_low = create_test_dict( + &[ + Some("group_one"), + None, + Some("group_two"), + Some("group_three"), + ], + &[ + Some(0), // group_one + Some(1), // null value + Some(2), // group_two + None, // null key + Some(0), // group_one + Some(1), // null value + Some(3), // group_three + None, // null key + Some(2), // group_two + Some(0), // group_one + Some(1), // null value + Some(3), // group_three + ], + ); + + let u8_low = UInt8Array::from(vec![ + Some(100), + Some(200), + Some(100), + Some(200), + Some(100), + Some(50), + Some(50), + Some(200), + Some(100), + Some(100), + Some(75), + Some(50), + ]); + + // Create u64 data with some nulls and duplicates for DISTINCT testing + let u64 = UInt64Array::from(vec![ + Some(1000), + Some(2000), + Some(1500), + Some(3000), + Some(1000), // duplicate + None, // null + Some(5000), + Some(2500), + Some(1500), // duplicate + Some(1200), + Some(4000), + Some(5000), // duplicate + ]); + + // Create u16 data with some nulls and duplicates + let u16 = UInt16Array::from(vec![ + Some(10), + Some(20), + Some(15), + None, // null + Some(10), // duplicate + Some(30), + Some(50), + Some(25), + Some(15), // duplicate + Some(12), + None, // null + Some(50), // duplicate + ]); + + // Create u32 data with some nulls and duplicates + let u32 = UInt32Array::from(vec![ + Some(100000), + Some(200000), + Some(150000), + Some(300000), + Some(100000), // duplicate + Some(400000), + Some(500000), + None, // null + Some(150000), // duplicate + Some(120000), + Some(450000), + None, // null + ]); + + // Create decimal128 data with precision 10, scale 2 + let decimal128 = Decimal128Array::from(vec![ + Some(12345), // 123.45 + Some(67890), // 678.90 + Some(11111), // 111.11 + None, // null + Some(12345), // 123.45 duplicate + Some(98765), // 987.65 + Some(55555), // 555.55 + Some(33333), // 333.33 + Some(11111), // 111.11 duplicate + Some(12500), // 125.00 + None, // null + Some(55555), // 555.55 duplicate + ]) + .with_precision_and_scale(10, 2) + .unwrap(); + + let schema = Arc::new(Schema::new(vec![ + Field::new("u8_low", DataType::UInt8, true), + Field::new("dictionary_utf8_low", string_dict_type(), true), + Field::new("u64", DataType::UInt64, true), + Field::new("u16", DataType::UInt16, true), + Field::new("u32", DataType::UInt32, true), + Field::new("decimal128", DataType::Decimal128(10, 2), true), + ])); + + Self { + schema, + u8_low, + dictionary_utf8_low, + u64, + u16, + u32, + decimal128, + } + } +} + +/// Sets up test contexts for fuzz table with numeric types for median testing and both single and multiple partitions +pub async fn setup_fuzz_median_test_contexts() -> Result<(SessionContext, SessionContext)> +{ + let test_data = FuzzMedianTestData::new(); + + // Single partition context + let ctx_single = create_fuzz_median_context_with_partitions(&test_data, 1).await?; + + // Multiple partition context + let ctx_multi = create_fuzz_median_context_with_partitions(&test_data, 3).await?; + + Ok((ctx_single, ctx_multi)) +} + +/// Creates a session context with fuzz median table partitioned into specified number of partitions +pub async fn create_fuzz_median_context_with_partitions( + test_data: &FuzzMedianTestData, + num_partitions: usize, +) -> Result { + let ctx = SessionContext::new_with_config( + SessionConfig::new().with_target_partitions(num_partitions), + ); + + let batches = split_fuzz_median_data_into_batches(test_data, num_partitions)?; + let provider = MemTable::try_new(test_data.schema.clone(), batches)?; + ctx.register_table("fuzz_table", Arc::new(provider))?; + + Ok(ctx) +} + +/// Splits fuzz median test data into multiple batches for partitioning +pub fn split_fuzz_median_data_into_batches( + test_data: &FuzzMedianTestData, + num_partitions: usize, +) -> Result>> { + debug_assert!(num_partitions > 0, "num_partitions must be greater than 0"); + let total_len = test_data.u8_low.len(); + let chunk_size = total_len.div_ceil(num_partitions); + + let mut batches = Vec::new(); + let mut start = 0; + + while start < total_len { + let end = min(start + chunk_size, total_len); + let len = end - start; + + if len > 0 { + let batch = RecordBatch::try_new( + test_data.schema.clone(), + vec![ + Arc::new(test_data.u8_low.slice(start, len)), + Arc::new(test_data.dictionary_utf8_low.slice(start, len)), + Arc::new(test_data.u64.slice(start, len)), + Arc::new(test_data.u16.slice(start, len)), + Arc::new(test_data.u32.slice(start, len)), + Arc::new(test_data.decimal128.slice(start, len)), + ], + )?; + batches.push(vec![batch]); + } + start = end; + } + + Ok(batches) +} + +/// Test data structure for fuzz table with timestamp and dictionary columns containing nulls +pub struct FuzzTimestampTestData { + pub schema: Arc, + pub utf8_low: StringArray, + pub u8_low: UInt8Array, + pub dictionary_utf8_low: DictionaryArray, + pub timestamp_us: TimestampMicrosecondArray, +} + +impl FuzzTimestampTestData { + pub fn new() -> Self { + // Create dictionary columns with null keys and values + let dictionary_utf8_low = create_test_dict( + &[Some("dict_x"), None, Some("dict_y"), Some("dict_z")], + &[ + Some(0), // dict_x + Some(1), // null value + Some(2), // dict_y + None, // null key + Some(0), // dict_x + Some(1), // null value + Some(3), // dict_z + None, // null key + Some(2), // dict_y + ], + ); + + let utf8_low = StringArray::from(vec![ + Some("alpha"), + Some("beta"), + Some("gamma"), + Some("delta"), + Some("alpha"), + Some("epsilon"), + Some("zeta"), + Some("delta"), + Some("gamma"), + ]); + + let u8_low = UInt8Array::from(vec![ + Some(10), + Some(20), + Some(30), + Some(20), + Some(10), + Some(40), + Some(30), + Some(20), + Some(30), + ]); + + // Create timestamp data with some nulls + let timestamp_us = TimestampMicrosecondArray::from(vec![ + Some(1000000), // 1970-01-01 00:00:01 + Some(2000000), // 1970-01-01 00:00:02 + Some(3000000), // 1970-01-01 00:00:03 + None, // null timestamp + Some(1500000), // 1970-01-01 00:00:01.5 + Some(4000000), // 1970-01-01 00:00:04 + Some(2500000), // 1970-01-01 00:00:02.5 + Some(3500000), // 1970-01-01 00:00:03.5 + Some(2800000), // 1970-01-01 00:00:02.8 + ]); + + let schema = Arc::new(Schema::new(vec![ + Field::new("utf8_low", DataType::Utf8, true), + Field::new("u8_low", DataType::UInt8, true), + Field::new("dictionary_utf8_low", string_dict_type(), true), + Field::new( + "timestamp_us", + DataType::Timestamp(TimeUnit::Microsecond, None), + true, + ), + ])); + + Self { + schema, + utf8_low, + u8_low, + dictionary_utf8_low, + timestamp_us, + } + } +} + +/// Sets up test contexts for fuzz table with timestamps and both single and multiple partitions +pub async fn setup_fuzz_timestamp_test_contexts( +) -> Result<(SessionContext, SessionContext)> { + let test_data = FuzzTimestampTestData::new(); + + // Single partition context + let ctx_single = create_fuzz_timestamp_context_with_partitions(&test_data, 1).await?; + + // Multiple partition context + let ctx_multi = create_fuzz_timestamp_context_with_partitions(&test_data, 3).await?; + + Ok((ctx_single, ctx_multi)) +} + +/// Creates a session context with fuzz timestamp table partitioned into specified number of partitions +pub async fn create_fuzz_timestamp_context_with_partitions( + test_data: &FuzzTimestampTestData, + num_partitions: usize, +) -> Result { + let ctx = SessionContext::new_with_config( + SessionConfig::new().with_target_partitions(num_partitions), + ); + + let batches = split_fuzz_timestamp_data_into_batches(test_data, num_partitions)?; + let provider = MemTable::try_new(test_data.schema.clone(), batches)?; + ctx.register_table("fuzz_table", Arc::new(provider))?; + + Ok(ctx) +} + +/// Splits fuzz timestamp test data into multiple batches for partitioning +pub fn split_fuzz_timestamp_data_into_batches( + test_data: &FuzzTimestampTestData, + num_partitions: usize, +) -> Result>> { + debug_assert!(num_partitions > 0, "num_partitions must be greater than 0"); + let total_len = test_data.utf8_low.len(); + let chunk_size = total_len.div_ceil(num_partitions); + + let mut batches = Vec::new(); + let mut start = 0; + + while start < total_len { + let end = min(start + chunk_size, total_len); + let len = end - start; + + if len > 0 { + let batch = RecordBatch::try_new( + test_data.schema.clone(), + vec![ + Arc::new(test_data.utf8_low.slice(start, len)), + Arc::new(test_data.u8_low.slice(start, len)), + Arc::new(test_data.dictionary_utf8_low.slice(start, len)), + Arc::new(test_data.timestamp_us.slice(start, len)), + ], + )?; + batches.push(vec![batch]); + } + start = end; + } + + Ok(batches) +} + +pub mod basic; +pub mod dict_nulls; diff --git a/datafusion/core/tests/sql/path_partition.rs b/datafusion/core/tests/sql/path_partition.rs index 5e9748d23d8cd..05cc723ef05fb 100644 --- a/datafusion/core/tests/sql/path_partition.rs +++ b/datafusion/core/tests/sql/path_partition.rs @@ -50,7 +50,7 @@ use object_store::{ path::Path, GetOptions, GetResult, GetResultPayload, ListResult, ObjectMeta, ObjectStore, PutOptions, PutResult, }; -use object_store::{Attributes, MultipartUpload, PutMultipartOpts, PutPayload}; +use object_store::{Attributes, MultipartUpload, PutMultipartOptions, PutPayload}; use url::Url; #[tokio::test] @@ -645,7 +645,7 @@ impl ObjectStore for MirroringObjectStore { async fn put_multipart_opts( &self, _location: &Path, - _opts: PutMultipartOpts, + _opts: PutMultipartOptions, ) -> object_store::Result> { unimplemented!() } diff --git a/datafusion/core/tests/tracing/traceable_object_store.rs b/datafusion/core/tests/tracing/traceable_object_store.rs index dfcafc3a63da1..60ef1cc5d6b6a 100644 --- a/datafusion/core/tests/tracing/traceable_object_store.rs +++ b/datafusion/core/tests/tracing/traceable_object_store.rs @@ -21,7 +21,7 @@ use crate::tracing::asserting_tracer::assert_traceability; use futures::stream::BoxStream; use object_store::{ path::Path, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, - ObjectStore, PutMultipartOpts, PutOptions, PutPayload, PutResult, + ObjectStore, PutMultipartOptions, PutOptions, PutPayload, PutResult, }; use std::fmt::{Debug, Display, Formatter}; use std::sync::Arc; @@ -68,7 +68,7 @@ impl ObjectStore for TraceableObjectStore { async fn put_multipart_opts( &self, location: &Path, - opts: PutMultipartOpts, + opts: PutMultipartOptions, ) -> object_store::Result> { assert_traceability().await; self.inner.put_multipart_opts(location, opts).await diff --git a/datafusion/core/tests/user_defined/user_defined_aggregates.rs b/datafusion/core/tests/user_defined/user_defined_aggregates.rs index aa5a72c0fb45b..7f1a12e9cd960 100644 --- a/datafusion/core/tests/user_defined/user_defined_aggregates.rs +++ b/datafusion/core/tests/user_defined/user_defined_aggregates.rs @@ -957,6 +957,33 @@ impl AggregateUDFImpl for MetadataBasedAggregateUdf { curr_sum: 0, })) } + + fn equals(&self, other: &dyn AggregateUDFImpl) -> bool { + let Some(other) = other.as_any().downcast_ref::() else { + return false; + }; + let Self { + name, + signature, + metadata, + } = self; + name == &other.name + && signature == &other.signature + && metadata == &other.metadata + } + + fn hash_value(&self) -> u64 { + let Self { + name, + signature, + metadata: _, // unhashable + } = self; + let mut hasher = DefaultHasher::new(); + std::any::type_name::().hash(&mut hasher); + name.hash(&mut hasher); + signature.hash(&mut hasher); + hasher.finish() + } } #[derive(Debug)] diff --git a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs index 90e49e504c758..dd8283613ae85 100644 --- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs @@ -17,7 +17,7 @@ use std::any::Any; use std::collections::HashMap; -use std::hash::{DefaultHasher, Hash, Hasher}; +use std::hash::{Hash, Hasher}; use std::sync::Arc; use arrow::array::{as_string_array, create_array, record_batch, Int8Array, UInt64Array}; @@ -43,9 +43,9 @@ use datafusion_common::{ use datafusion_expr::expr::FieldMetadata; use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; use datafusion_expr::{ - lit_with_metadata, Accumulator, ColumnarValue, CreateFunction, CreateFunctionBody, - LogicalPlanBuilder, OperateFunctionArg, ReturnFieldArgs, ScalarFunctionArgs, - ScalarUDF, ScalarUDFImpl, Signature, Volatility, + lit_with_metadata, udf_equals_hash, Accumulator, ColumnarValue, CreateFunction, + CreateFunctionBody, LogicalPlanBuilder, OperateFunctionArg, ReturnFieldArgs, + ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, Volatility, }; use datafusion_functions_nested::range::range_udf; use parking_lot::Mutex; @@ -181,6 +181,7 @@ async fn scalar_udf() -> Result<()> { Ok(()) } +#[derive(PartialEq, Hash)] struct Simple0ArgsScalarUDF { name: String, signature: Signature, @@ -217,6 +218,8 @@ impl ScalarUDFImpl for Simple0ArgsScalarUDF { fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result { Ok(ColumnarValue::Scalar(ScalarValue::Int32(Some(100)))) } + + udf_equals_hash!(ScalarUDFImpl); } #[tokio::test] @@ -489,7 +492,7 @@ async fn test_user_defined_functions_with_alias() -> Result<()> { } /// Volatile UDF that should append a different value to each row -#[derive(Debug)] +#[derive(Debug, PartialEq, Hash)] struct AddIndexToStringVolatileScalarUDF { name: String, signature: Signature, @@ -557,6 +560,8 @@ impl ScalarUDFImpl for AddIndexToStringVolatileScalarUDF { }; Ok(ColumnarValue::Array(Arc::new(StringArray::from(answer)))) } + + udf_equals_hash!(ScalarUDFImpl); } #[tokio::test] @@ -936,7 +941,7 @@ impl FunctionFactory for CustomFunctionFactory { // // it also defines custom [ScalarUDFImpl::simplify()] // to replace ScalarUDF expression with one instance contains. -#[derive(Debug)] +#[derive(Debug, PartialEq, Hash)] struct ScalarFunctionWrapper { name: String, expr: Expr, @@ -974,6 +979,8 @@ impl ScalarUDFImpl for ScalarFunctionWrapper { Ok(ExprSimplifyResult::Simplified(replacement)) } + + udf_equals_hash!(ScalarUDFImpl); } impl ScalarFunctionWrapper { @@ -1208,6 +1215,21 @@ struct MyRegexUdf { regex: Regex, } +impl PartialEq for MyRegexUdf { + fn eq(&self, other: &Self) -> bool { + let Self { signature, regex } = self; + signature == &other.signature && regex.as_str() == other.regex.as_str() + } +} + +impl Hash for MyRegexUdf { + fn hash(&self, state: &mut H) { + let Self { signature, regex } = self; + signature.hash(state); + regex.as_str().hash(state); + } +} + impl MyRegexUdf { fn new(pattern: &str) -> Self { Self { @@ -1260,19 +1282,7 @@ impl ScalarUDFImpl for MyRegexUdf { } } - fn equals(&self, other: &dyn ScalarUDFImpl) -> bool { - if let Some(other) = other.as_any().downcast_ref::() { - self.regex.as_str() == other.regex.as_str() - } else { - false - } - } - - fn hash_value(&self) -> u64 { - let hasher = &mut DefaultHasher::new(); - self.regex.as_str().hash(hasher); - hasher.finish() - } + udf_equals_hash!(ScalarUDFImpl); } #[tokio::test] @@ -1370,13 +1380,25 @@ async fn plan_and_collect(ctx: &SessionContext, sql: &str) -> Result, } +impl Hash for MetadataBasedUdf { + fn hash(&self, state: &mut H) { + let Self { + name, + signature, + metadata: _, // unhashable + } = self; + name.hash(state); + signature.hash(state); + } +} + impl MetadataBasedUdf { fn new(metadata: HashMap) -> Self { // The name we return must be unique. Otherwise we will not call distinct @@ -1449,9 +1471,7 @@ impl ScalarUDFImpl for MetadataBasedUdf { } } - fn equals(&self, other: &dyn ScalarUDFImpl) -> bool { - self.name == other.name() - } + udf_equals_hash!(ScalarUDFImpl); } #[tokio::test] @@ -1669,10 +1689,6 @@ impl ScalarUDFImpl for ExtensionBasedUdf { } } } - - fn equals(&self, other: &dyn ScalarUDFImpl) -> bool { - self.name == other.name() - } } struct MyUserExtentionType {} diff --git a/datafusion/core/tests/user_defined/user_defined_window_functions.rs b/datafusion/core/tests/user_defined/user_defined_window_functions.rs index bcd2c3945e392..3251847d03616 100644 --- a/datafusion/core/tests/user_defined/user_defined_window_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_window_functions.rs @@ -40,6 +40,7 @@ use datafusion_physical_expr::{ PhysicalExpr, }; use std::collections::HashMap; +use std::hash::{DefaultHasher, Hash, Hasher}; use std::{ any::Any, ops::Range, @@ -568,6 +569,33 @@ impl OddCounter { fn field(&self, field_args: WindowUDFFieldArgs) -> Result { Ok(Field::new(field_args.name(), DataType::Int64, true).into()) } + + fn equals(&self, other: &dyn WindowUDFImpl) -> bool { + let Some(other) = other.as_any().downcast_ref::() else { + return false; + }; + let Self { + signature, + test_state, + aliases, + } = self; + signature == &other.signature + && Arc::ptr_eq(test_state, &other.test_state) + && aliases == &other.aliases + } + + fn hash_value(&self) -> u64 { + let Self { + signature, + test_state, + aliases, + } = self; + let mut hasher = DefaultHasher::new(); + signature.hash(&mut hasher); + Arc::as_ptr(test_state).hash(&mut hasher); + aliases.hash(&mut hasher); + hasher.finish() + } } ctx.register_udwf(WindowUDF::from(SimpleWindowUDF::new(test_state))) @@ -815,6 +843,33 @@ impl WindowUDFImpl for MetadataBasedWindowUdf { .with_metadata(self.metadata.clone()) .into()) } + + fn equals(&self, other: &dyn WindowUDFImpl) -> bool { + let Some(other) = other.as_any().downcast_ref::() else { + return false; + }; + let Self { + name, + signature, + metadata, + } = self; + name == &other.name + && signature == &other.signature + && metadata == &other.metadata + } + + fn hash_value(&self) -> u64 { + let Self { + name, + signature, + metadata: _, // unhashable + } = self; + let mut hasher = DefaultHasher::new(); + std::any::type_name::().hash(&mut hasher); + name.hash(&mut hasher); + signature.hash(&mut hasher); + hasher.finish() + } } #[derive(Debug)] diff --git a/datafusion/datasource-avro/src/file_format.rs b/datafusion/datasource-avro/src/file_format.rs index 47f8d9daca0ad..60c361b42e771 100644 --- a/datafusion/datasource-avro/src/file_format.rs +++ b/datafusion/datasource-avro/src/file_format.rs @@ -110,6 +110,10 @@ impl FileFormat for AvroFormat { } } + fn compression_type(&self) -> Option { + None + } + async fn infer_schema( &self, _state: &dyn Session, diff --git a/datafusion/datasource-csv/src/file_format.rs b/datafusion/datasource-csv/src/file_format.rs index c9cd09bf676b7..4eeb431584ba7 100644 --- a/datafusion/datasource-csv/src/file_format.rs +++ b/datafusion/datasource-csv/src/file_format.rs @@ -151,13 +151,13 @@ impl CsvFormat { let stream = store .get(&object.location) .await - .map_err(DataFusionError::ObjectStore); + .map_err(|e| DataFusionError::ObjectStore(Box::new(e))); let stream = match stream { Ok(stream) => self .read_to_delimited_chunks_from_stream( stream .into_stream() - .map_err(DataFusionError::ObjectStore) + .map_err(|e| DataFusionError::ObjectStore(Box::new(e))) .boxed(), ) .await @@ -181,7 +181,7 @@ impl CsvFormat { let stream = match decoder { Ok(decoded_stream) => { newline_delimited_stream(decoded_stream.map_err(|e| match e { - DataFusionError::ObjectStore(e) => e, + DataFusionError::ObjectStore(e) => *e, err => object_store::Error::Generic { store: "read to delimited chunks failed", source: Box::new(err), @@ -358,6 +358,10 @@ impl FileFormat for CsvFormat { Ok(format!("{}{}", ext, file_compression_type.get_ext())) } + fn compression_type(&self) -> Option { + Some(self.options.compression.into()) + } + async fn infer_schema( &self, state: &dyn Session, diff --git a/datafusion/datasource-json/src/file_format.rs b/datafusion/datasource-json/src/file_format.rs index f6b758b5bc51c..51f4bd7e963e0 100644 --- a/datafusion/datasource-json/src/file_format.rs +++ b/datafusion/datasource-json/src/file_format.rs @@ -185,6 +185,10 @@ impl FileFormat for JsonFormat { Ok(format!("{}{}", ext, file_compression_type.get_ext())) } + fn compression_type(&self) -> Option { + Some(self.options.compression.into()) + } + async fn infer_schema( &self, _state: &dyn Session, diff --git a/datafusion/datasource-parquet/Cargo.toml b/datafusion/datasource-parquet/Cargo.toml index 08d258852a204..8a75a445c8ffb 100644 --- a/datafusion/datasource-parquet/Cargo.toml +++ b/datafusion/datasource-parquet/Cargo.toml @@ -48,6 +48,7 @@ datafusion-physical-plan = { workspace = true } datafusion-pruning = { workspace = true } datafusion-session = { workspace = true } futures = { workspace = true } +hex = { workspace = true, optional = true } itertools = { workspace = true } log = { workspace = true } object_store = { workspace = true } @@ -65,3 +66,10 @@ workspace = true [lib] name = "datafusion_datasource_parquet" path = "src/mod.rs" + +[features] +parquet_encryption = [ + "parquet/encryption", + "datafusion-common/parquet_encryption", + "dep:hex", +] diff --git a/datafusion/datasource-parquet/src/file_format.rs b/datafusion/datasource-parquet/src/file_format.rs index 59663fe5100a2..43b0886193e74 100644 --- a/datafusion/datasource-parquet/src/file_format.rs +++ b/datafusion/datasource-parquet/src/file_format.rs @@ -39,6 +39,9 @@ use datafusion_datasource::write::demux::DemuxedStreamReceiver; use arrow::compute::sum; use arrow::datatypes::{DataType, Field, FieldRef}; use datafusion_common::config::{ConfigField, ConfigFileType, TableParquetOptions}; +use datafusion_common::encryption::{ + map_config_decryption_to_decryption, FileDecryptionProperties, +}; use datafusion_common::parsers::CompressionTypeVariant; use datafusion_common::stats::Precision; use datafusion_common::{ @@ -78,7 +81,7 @@ use parquet::arrow::arrow_writer::{ use parquet::arrow::async_reader::MetadataFetch; use parquet::arrow::{parquet_to_arrow_schema, ArrowSchemaConverter, AsyncArrowWriter}; use parquet::basic::Type; -use parquet::encryption::decrypt::FileDecryptionProperties; + use parquet::errors::ParquetError; use parquet::file::metadata::{ParquetMetaData, ParquetMetaDataReader, RowGroupMetaData}; use parquet::file::properties::{WriterProperties, WriterPropertiesBuilder}; @@ -340,6 +343,10 @@ impl FileFormat for ParquetFormat { } } + fn compression_type(&self) -> Option { + None + } + async fn infer_schema( &self, state: &dyn Session, @@ -350,15 +357,11 @@ impl FileFormat for ParquetFormat { Some(time_unit) => Some(parse_coerce_int96_string(time_unit.as_str())?), None => None, }; - let config_file_decryption_properties = &self.options.crypto.file_decryption; let file_decryption_properties: Option = - match config_file_decryption_properties { - Some(cfd) => { - let fd: FileDecryptionProperties = cfd.clone().into(); - Some(fd) - } - None => None, - }; + map_config_decryption_to_decryption( + self.options.crypto.file_decryption.as_ref(), + ); + let mut schemas: Vec<_> = futures::stream::iter(objects) .map(|object| { fetch_schema_with_location( @@ -415,15 +418,10 @@ impl FileFormat for ParquetFormat { table_schema: SchemaRef, object: &ObjectMeta, ) -> Result { - let config_file_decryption_properties = &self.options.crypto.file_decryption; let file_decryption_properties: Option = - match config_file_decryption_properties { - Some(cfd) => { - let fd: FileDecryptionProperties = cfd.clone().into(); - Some(fd) - } - None => None, - }; + map_config_decryption_to_decryption( + self.options.crypto.file_decryption.as_ref(), + ); let stats = fetch_statistics( store.as_ref(), table_schema, @@ -959,14 +957,17 @@ pub async fn fetch_parquet_metadata( store: &dyn ObjectStore, meta: &ObjectMeta, size_hint: Option, - decryption_properties: Option<&FileDecryptionProperties>, + #[allow(unused)] decryption_properties: Option<&FileDecryptionProperties>, ) -> Result { let file_size = meta.size; let fetch = ObjectStoreFetch::new(store, meta); - ParquetMetaDataReader::new() - .with_prefetch_hint(size_hint) - .with_decryption_properties(decryption_properties) + let reader = ParquetMetaDataReader::new().with_prefetch_hint(size_hint); + + #[cfg(feature = "parquet_encryption")] + let reader = reader.with_decryption_properties(decryption_properties); + + reader .load_and_finish(fetch, file_size) .await .map_err(DataFusionError::from) @@ -1345,7 +1346,7 @@ impl FileSink for ParquetSink { let file_metadata = writer .close() .await - .map_err(DataFusionError::ParquetError)?; + .map_err(|e| DataFusionError::ParquetError(Box::new(e)))?; Ok((path, file_metadata)) }); } else { @@ -1408,7 +1409,7 @@ impl FileSink for ParquetSink { demux_task .join_unwind() .await - .map_err(DataFusionError::ExecutionJoin)??; + .map_err(|e| DataFusionError::ExecutionJoin(Box::new(e)))??; Ok(row_count as u64) } @@ -1536,7 +1537,7 @@ fn spawn_rg_join_and_finalize_task( let (writer, _col_reservation) = task .join_unwind() .await - .map_err(DataFusionError::ExecutionJoin)??; + .map_err(|e| DataFusionError::ExecutionJoin(Box::new(e)))??; let encoded_size = writer.get_estimated_total_bytes(); rg_reservation.grow(encoded_size); finalized_rg.push(writer.close()?); @@ -1673,7 +1674,7 @@ async fn concatenate_parallel_row_groups( let result = task.join_unwind().await; let mut rg_out = parquet_writer.next_row_group()?; let (serialized_columns, mut rg_reservation, _cnt) = - result.map_err(DataFusionError::ExecutionJoin)??; + result.map_err(|e| DataFusionError::ExecutionJoin(Box::new(e)))??; for chunk in serialized_columns { chunk.append_to_row_group(&mut rg_out)?; rg_reservation.free(); @@ -1740,7 +1741,7 @@ async fn output_single_parquet_file_parallelized( launch_serialization_task .join_unwind() .await - .map_err(DataFusionError::ExecutionJoin)??; + .map_err(|e| DataFusionError::ExecutionJoin(Box::new(e)))??; Ok(file_metadata) } diff --git a/datafusion/datasource-parquet/src/opener.rs b/datafusion/datasource-parquet/src/opener.rs index b39ec3929f978..7c208d1426ac3 100644 --- a/datafusion/datasource-parquet/src/opener.rs +++ b/datafusion/datasource-parquet/src/opener.rs @@ -31,10 +31,12 @@ use datafusion_datasource::schema_adapter::SchemaAdapterFactory; use arrow::datatypes::{FieldRef, SchemaRef, TimeUnit}; use arrow::error::ArrowError; +use datafusion_common::encryption::FileDecryptionProperties; + use datafusion_common::{exec_err, DataFusionError, Result}; use datafusion_datasource::PartitionedFile; +use datafusion_physical_expr::schema_rewriter::PhysicalExprAdapterFactory; use datafusion_physical_expr::simplifier::PhysicalExprSimplifier; -use datafusion_physical_expr::PhysicalExprSchemaRewriter; use datafusion_physical_expr_common::physical_expr::{ is_dynamic_physical_expr, PhysicalExpr, }; @@ -42,11 +44,11 @@ use datafusion_physical_plan::metrics::{Count, ExecutionPlanMetricsSet, MetricBu use datafusion_pruning::{build_pruning_predicate, FilePruner, PruningPredicate}; use futures::{StreamExt, TryStreamExt}; +use itertools::Itertools; use log::debug; use parquet::arrow::arrow_reader::{ArrowReaderMetadata, ArrowReaderOptions}; use parquet::arrow::async_reader::AsyncFileReader; use parquet::arrow::{ParquetRecordBatchStreamBuilder, ProjectionMask}; -use parquet::encryption::decrypt::FileDecryptionProperties; use parquet::file::metadata::ParquetMetaDataReader; /// Implements [`FileOpener`] for a parquet file @@ -92,6 +94,8 @@ pub(super) struct ParquetOpener { pub coerce_int96: Option, /// Optional parquet FileDecryptionProperties pub file_decryption_properties: Option>, + /// Rewrite expressions in the context of the file schema + pub(crate) expr_adapter_factory: Option>, } impl FileOpener for ParquetOpener { @@ -116,10 +120,11 @@ impl FileOpener for ParquetOpener { let projected_schema = SchemaRef::from(self.logical_file_schema.project(&self.projection)?); + let schema_adapter_factory = Arc::clone(&self.schema_adapter_factory); let schema_adapter = self .schema_adapter_factory .create(projected_schema, Arc::clone(&self.logical_file_schema)); - let predicate = self.predicate.clone(); + let mut predicate = self.predicate.clone(); let logical_file_schema = Arc::clone(&self.logical_file_schema); let partition_fields = self.partition_fields.clone(); let reorder_predicates = self.reorder_filters; @@ -132,6 +137,9 @@ impl FileOpener for ParquetOpener { let predicate_creation_errors = MetricBuilder::new(&self.metrics) .global_counter("num_predicate_creation_errors"); + let expr_adapter_factory = self.expr_adapter_factory.clone(); + let mut predicate_file_schema = Arc::clone(&self.logical_file_schema); + let mut enable_page_index = self.enable_page_index; let file_decryption_properties = self.file_decryption_properties.clone(); @@ -182,6 +190,7 @@ impl FileOpener for ParquetOpener { // pruning predicates. Thus default to not requesting if from the // underlying reader. let mut options = ArrowReaderOptions::new().with_page_index(false); + #[cfg(feature = "parquet_encryption")] if let Some(fd_val) = file_decryption_properties { options = options.with_file_decryption_properties((*fd_val).clone()); } @@ -235,34 +244,34 @@ impl FileOpener for ParquetOpener { // Adapt the predicate to the physical file schema. // This evaluates missing columns and inserts any necessary casts. - let predicate = predicate - .map(|p| { - PhysicalExprSchemaRewriter::new( - &physical_file_schema, - &logical_file_schema, - ) - .with_partition_columns( - partition_fields.to_vec(), - file.partition_values, - ) - .rewrite(p) - .map_err(ArrowError::from) + if let Some(expr_adapter_factory) = expr_adapter_factory { + predicate = predicate .map(|p| { + let partition_values = partition_fields + .iter() + .cloned() + .zip(file.partition_values) + .collect_vec(); + let expr = expr_adapter_factory + .create( + Arc::clone(&logical_file_schema), + Arc::clone(&physical_file_schema), + ) + .with_partition_values(partition_values) + .rewrite(p)?; // After rewriting to the file schema, further simplifications may be possible. // For example, if `'a' = col_that_is_missing` becomes `'a' = NULL` that can then be simplified to `FALSE` // and we can avoid doing any more work on the file (bloom filters, loading the page index, etc.). - PhysicalExprSimplifier::new(&physical_file_schema) - .simplify(p) - .map_err(ArrowError::from) + PhysicalExprSimplifier::new(&physical_file_schema).simplify(expr) }) - }) - .transpose()? - .transpose()?; + .transpose()?; + predicate_file_schema = Arc::clone(&physical_file_schema); + } // Build predicates for this specific file let (pruning_predicate, page_pruning_predicate) = build_pruning_predicates( predicate.as_ref(), - &physical_file_schema, + &predicate_file_schema, &predicate_creation_errors, ); @@ -299,9 +308,11 @@ impl FileOpener for ParquetOpener { let row_filter = row_filter::build_row_filter( &predicate, &physical_file_schema, + &predicate_file_schema, builder.metadata(), reorder_predicates, &file_metrics, + &schema_adapter_factory, ); match row_filter { @@ -513,21 +524,31 @@ fn should_enable_page_index( mod test { use std::sync::Arc; - use arrow::datatypes::{DataType, Field, Schema}; + use arrow::{ + compute::cast, + datatypes::{DataType, Field, Schema, SchemaRef}, + }; use bytes::{BufMut, BytesMut}; use chrono::Utc; use datafusion_common::{ - record_batch, stats::Precision, ColumnStatistics, ScalarValue, Statistics, + assert_batches_eq, record_batch, stats::Precision, ColumnStatistics, ScalarValue, + Statistics, }; use datafusion_datasource::{ - file_meta::FileMeta, file_stream::FileOpener, - schema_adapter::DefaultSchemaAdapterFactory, PartitionedFile, + file_meta::FileMeta, + file_stream::FileOpener, + schema_adapter::{ + DefaultSchemaAdapterFactory, SchemaAdapter, SchemaAdapterFactory, + SchemaMapper, + }, + PartitionedFile, }; use datafusion_expr::{col, lit}; use datafusion_physical_expr::{ - expressions::DynamicFilterPhysicalExpr, planner::logical2physical, PhysicalExpr, + expressions::DynamicFilterPhysicalExpr, planner::logical2physical, + schema_rewriter::DefaultPhysicalExprAdapterFactory, PhysicalExpr, }; - use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet; + use datafusion_physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet}; use futures::{Stream, StreamExt}; use object_store::{memory::InMemory, path::Path, ObjectMeta, ObjectStore}; use parquet::arrow::ArrowWriter; @@ -555,6 +576,25 @@ mod test { (num_batches, num_rows) } + async fn collect_batches( + mut stream: std::pin::Pin< + Box< + dyn Stream< + Item = Result< + arrow::array::RecordBatch, + arrow::error::ArrowError, + >, + > + Send, + >, + >, + ) -> Vec { + let mut batches = vec![]; + while let Some(Ok(batch)) = stream.next().await { + batches.push(batch); + } + batches + } + async fn write_parquet( store: Arc, filename: &str, @@ -631,6 +671,7 @@ mod test { enable_row_group_stats_pruning: true, coerce_int96: None, file_decryption_properties: None, + expr_adapter_factory: Some(Arc::new(DefaultPhysicalExprAdapterFactory)), } }; @@ -716,6 +757,7 @@ mod test { enable_row_group_stats_pruning: true, coerce_int96: None, file_decryption_properties: None, + expr_adapter_factory: Some(Arc::new(DefaultPhysicalExprAdapterFactory)), } }; @@ -817,6 +859,7 @@ mod test { enable_row_group_stats_pruning: true, coerce_int96: None, file_decryption_properties: None, + expr_adapter_factory: Some(Arc::new(DefaultPhysicalExprAdapterFactory)), } }; let make_meta = || FileMeta { @@ -928,6 +971,7 @@ mod test { enable_row_group_stats_pruning: false, // note that this is false! coerce_int96: None, file_decryption_properties: None, + expr_adapter_factory: Some(Arc::new(DefaultPhysicalExprAdapterFactory)), } }; @@ -1040,6 +1084,7 @@ mod test { enable_row_group_stats_pruning: true, coerce_int96: None, file_decryption_properties: None, + expr_adapter_factory: Some(Arc::new(DefaultPhysicalExprAdapterFactory)), } }; @@ -1081,4 +1126,167 @@ mod test { assert_eq!(num_batches, 0); assert_eq!(num_rows, 0); } + + fn get_value(metrics: &MetricsSet, metric_name: &str) -> usize { + match metrics.sum_by_name(metric_name) { + Some(v) => v.as_usize(), + _ => { + panic!( + "Expected metric not found. Looking for '{metric_name}' in\n\n{metrics:#?}" + ); + } + } + } + + #[tokio::test] + async fn test_custom_schema_adapter_no_rewriter() { + // Make a hardcoded schema adapter that adds a new column "b" with default value 0.0 + // and converts the first column "a" from Int32 to UInt64. + #[derive(Debug, Clone)] + struct CustomSchemaMapper; + + impl SchemaMapper for CustomSchemaMapper { + fn map_batch( + &self, + batch: arrow::array::RecordBatch, + ) -> datafusion_common::Result { + let a_column = cast(batch.column(0), &DataType::UInt64)?; + // Add in a new column "b" with default value 0.0 + let b_column = + arrow::array::Float64Array::from(vec![Some(0.0); batch.num_rows()]); + let columns = vec![a_column, Arc::new(b_column)]; + let new_schema = Arc::new(Schema::new(vec![ + Field::new("a", DataType::UInt64, false), + Field::new("b", DataType::Float64, false), + ])); + Ok(arrow::record_batch::RecordBatch::try_new( + new_schema, columns, + )?) + } + + fn map_column_statistics( + &self, + file_col_statistics: &[ColumnStatistics], + ) -> datafusion_common::Result> { + Ok(vec![ + file_col_statistics[0].clone(), + ColumnStatistics::new_unknown(), + ]) + } + } + + #[derive(Debug, Clone)] + struct CustomSchemaAdapter; + + impl SchemaAdapter for CustomSchemaAdapter { + fn map_schema( + &self, + _file_schema: &Schema, + ) -> datafusion_common::Result<(Arc, Vec)> + { + let mapper = Arc::new(CustomSchemaMapper); + let projection = vec![0]; // We only need to read the first column "a" from the file + Ok((mapper, projection)) + } + + fn map_column_index( + &self, + index: usize, + file_schema: &Schema, + ) -> Option { + if index < file_schema.fields().len() { + Some(index) + } else { + None // The new column "b" is not in the original schema + } + } + } + + #[derive(Debug, Clone)] + struct CustomSchemaAdapterFactory; + + impl SchemaAdapterFactory for CustomSchemaAdapterFactory { + fn create( + &self, + _projected_table_schema: SchemaRef, + _table_schema: SchemaRef, + ) -> Box { + Box::new(CustomSchemaAdapter) + } + } + + // Test that if no expression rewriter is provided we use a schemaadapter to adapt the data to the expresssion + let store = Arc::new(InMemory::new()) as Arc; + let batch = record_batch!(("a", Int32, vec![Some(1), Some(2), Some(3)])).unwrap(); + // Write out the batch to a Parquet file + let data_size = + write_parquet(Arc::clone(&store), "test.parquet", batch.clone()).await; + let file = PartitionedFile::new( + "test.parquet".to_string(), + u64::try_from(data_size).unwrap(), + ); + let table_schema = Arc::new(Schema::new(vec![ + Field::new("a", DataType::UInt64, false), + Field::new("b", DataType::Float64, false), + ])); + + let file_meta = FileMeta { + object_meta: ObjectMeta { + location: Path::from("test.parquet"), + last_modified: Utc::now(), + size: u64::try_from(data_size).unwrap(), + e_tag: None, + version: None, + }, + range: None, + extensions: None, + metadata_size_hint: None, + }; + + let make_opener = |predicate| ParquetOpener { + partition_index: 0, + projection: Arc::new([0, 1]), + batch_size: 1024, + limit: None, + predicate: Some(predicate), + logical_file_schema: Arc::clone(&table_schema), + metadata_size_hint: None, + metrics: ExecutionPlanMetricsSet::new(), + parquet_file_reader_factory: Arc::new(DefaultParquetFileReaderFactory::new( + Arc::clone(&store), + )), + partition_fields: vec![], + pushdown_filters: true, + reorder_filters: false, + enable_page_index: false, + enable_bloom_filter: false, + schema_adapter_factory: Arc::new(CustomSchemaAdapterFactory), + enable_row_group_stats_pruning: false, + coerce_int96: None, + file_decryption_properties: None, + expr_adapter_factory: None, + }; + + let predicate = logical2physical(&col("a").eq(lit(1u64)), &table_schema); + let opener = make_opener(predicate); + let stream = opener + .open(file_meta.clone(), file.clone()) + .unwrap() + .await + .unwrap(); + let batches = collect_batches(stream).await; + + #[rustfmt::skip] + let expected = [ + "+---+-----+", + "| a | b |", + "+---+-----+", + "| 1 | 0.0 |", + "+---+-----+", + ]; + assert_batches_eq!(expected, &batches); + let metrics = opener.metrics.clone_inner(); + assert_eq!(get_value(&metrics, "row_groups_pruned_statistics"), 0); + assert_eq!(get_value(&metrics, "pushdown_rows_pruned"), 2); + } } diff --git a/datafusion/datasource-parquet/src/row_filter.rs b/datafusion/datasource-parquet/src/row_filter.rs index 5626f83186e31..70750a75bc612 100644 --- a/datafusion/datasource-parquet/src/row_filter.rs +++ b/datafusion/datasource-parquet/src/row_filter.rs @@ -67,7 +67,6 @@ use arrow::array::BooleanArray; use arrow::datatypes::{DataType, Schema, SchemaRef}; use arrow::error::{ArrowError, Result as ArrowResult}; use arrow::record_batch::RecordBatch; -use itertools::Itertools; use parquet::arrow::arrow_reader::{ArrowPredicate, RowFilter}; use parquet::arrow::ProjectionMask; use parquet::file::metadata::ParquetMetaData; @@ -75,8 +74,9 @@ use parquet::file::metadata::ParquetMetaData; use datafusion_common::cast::as_boolean_array; use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor}; use datafusion_common::Result; +use datafusion_datasource::schema_adapter::{SchemaAdapterFactory, SchemaMapper}; use datafusion_physical_expr::expressions::Column; -use datafusion_physical_expr::utils::{collect_columns, reassign_predicate_columns}; +use datafusion_physical_expr::utils::reassign_predicate_columns; use datafusion_physical_expr::{split_conjunction, PhysicalExpr}; use datafusion_physical_plan::metrics; @@ -106,6 +106,8 @@ pub(crate) struct DatafusionArrowPredicate { rows_matched: metrics::Count, /// how long was spent evaluating this predicate time: metrics::Time, + /// used to perform type coercion while filtering rows + schema_mapper: Arc, } impl DatafusionArrowPredicate { @@ -130,6 +132,7 @@ impl DatafusionArrowPredicate { rows_pruned, rows_matched, time, + schema_mapper: candidate.schema_mapper, }) } } @@ -140,6 +143,8 @@ impl ArrowPredicate for DatafusionArrowPredicate { } fn evaluate(&mut self, batch: RecordBatch) -> ArrowResult { + let batch = self.schema_mapper.map_batch(batch)?; + // scoped timer updates on drop let mut timer = self.time.timer(); @@ -182,6 +187,9 @@ pub(crate) struct FilterCandidate { /// required to pass thorugh a `SchemaMapper` to the table schema /// upon which we then evaluate the filter expression. projection: Vec, + /// A `SchemaMapper` used to map batches read from the file schema to + /// the filter's projection of the table schema. + schema_mapper: Arc, /// The projected table schema that this filter references filter_schema: SchemaRef, } @@ -222,11 +230,26 @@ struct FilterCandidateBuilder { /// columns in the file schema that are not in the table schema or columns that /// are in the table schema that are not in the file schema. file_schema: SchemaRef, + /// The schema of the table (merged schema) -- columns may be in different + /// order than in the file and have columns that are not in the file schema + table_schema: SchemaRef, + /// A `SchemaAdapterFactory` used to map the file schema to the table schema. + schema_adapter_factory: Arc, } impl FilterCandidateBuilder { - pub fn new(expr: Arc, file_schema: Arc) -> Self { - Self { expr, file_schema } + pub fn new( + expr: Arc, + file_schema: Arc, + table_schema: Arc, + schema_adapter_factory: Arc, + ) -> Self { + Self { + expr, + file_schema, + table_schema, + schema_adapter_factory, + } } /// Attempt to build a `FilterCandidate` from the expression @@ -238,21 +261,20 @@ impl FilterCandidateBuilder { /// * `Err(e)` if an error occurs while building the candidate pub fn build(self, metadata: &ParquetMetaData) -> Result> { let Some(required_indices_into_table_schema) = - pushdown_columns(&self.expr, &self.file_schema)? + pushdown_columns(&self.expr, &self.table_schema)? else { return Ok(None); }; let projected_table_schema = Arc::new( - self.file_schema + self.table_schema .project(&required_indices_into_table_schema)?, ); - let projection_into_file_schema = collect_columns(&self.expr) - .iter() - .map(|c| c.index()) - .sorted_unstable() - .collect_vec(); + let (schema_mapper, projection_into_file_schema) = self + .schema_adapter_factory + .create(Arc::clone(&projected_table_schema), self.table_schema) + .map_schema(&self.file_schema)?; let required_bytes = size_of_columns(&projection_into_file_schema, metadata)?; let can_use_index = columns_sorted(&projection_into_file_schema, metadata)?; @@ -262,6 +284,7 @@ impl FilterCandidateBuilder { required_bytes, can_use_index, projection: projection_into_file_schema, + schema_mapper: Arc::clone(&schema_mapper), filter_schema: Arc::clone(&projected_table_schema), })) } @@ -403,9 +426,11 @@ fn columns_sorted(_columns: &[usize], _metadata: &ParquetMetaData) -> Result, physical_file_schema: &SchemaRef, + predicate_file_schema: &SchemaRef, metadata: &ParquetMetaData, reorder_predicates: bool, file_metrics: &ParquetFileMetrics, + schema_adapter_factory: &Arc, ) -> Result> { let rows_pruned = &file_metrics.pushdown_rows_pruned; let rows_matched = &file_metrics.pushdown_rows_matched; @@ -422,6 +447,8 @@ pub fn build_row_filter( FilterCandidateBuilder::new( Arc::clone(expr), Arc::clone(physical_file_schema), + Arc::clone(predicate_file_schema), + Arc::clone(schema_adapter_factory), ) .build(metadata) }) @@ -465,9 +492,13 @@ mod test { use super::*; use datafusion_common::ScalarValue; + use arrow::datatypes::{Field, TimeUnit::Nanosecond}; + use datafusion_datasource::schema_adapter::DefaultSchemaAdapterFactory; use datafusion_expr::{col, Expr}; use datafusion_physical_expr::planner::logical2physical; + use datafusion_physical_plan::metrics::{Count, Time}; + use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; use parquet::arrow::parquet_to_arrow_schema; use parquet::file::reader::{FileReader, SerializedFileReader}; @@ -489,15 +520,111 @@ mod test { let expr = col("int64_list").is_not_null(); let expr = logical2physical(&expr, &table_schema); + let schema_adapter_factory = Arc::new(DefaultSchemaAdapterFactory); let table_schema = Arc::new(table_schema.clone()); - let candidate = FilterCandidateBuilder::new(expr, table_schema.clone()) - .build(metadata) - .expect("building candidate"); + let candidate = FilterCandidateBuilder::new( + expr, + table_schema.clone(), + table_schema, + schema_adapter_factory, + ) + .build(metadata) + .expect("building candidate"); assert!(candidate.is_none()); } + #[test] + fn test_filter_type_coercion() { + let testdata = datafusion_common::test_util::parquet_test_data(); + let file = std::fs::File::open(format!("{testdata}/alltypes_plain.parquet")) + .expect("opening file"); + + let parquet_reader_builder = + ParquetRecordBatchReaderBuilder::try_new(file).expect("creating reader"); + let metadata = parquet_reader_builder.metadata().clone(); + let file_schema = parquet_reader_builder.schema().clone(); + + // This is the schema we would like to coerce to, + // which is different from the physical schema of the file. + let table_schema = Schema::new(vec![Field::new( + "timestamp_col", + DataType::Timestamp(Nanosecond, Some(Arc::from("UTC"))), + false, + )]); + + // Test all should fail + let expr = col("timestamp_col").lt(Expr::Literal( + ScalarValue::TimestampNanosecond(Some(1), Some(Arc::from("UTC"))), + None, + )); + let expr = logical2physical(&expr, &table_schema); + let schema_adapter_factory = Arc::new(DefaultSchemaAdapterFactory); + let table_schema = Arc::new(table_schema.clone()); + let candidate = FilterCandidateBuilder::new( + expr, + file_schema.clone(), + table_schema.clone(), + schema_adapter_factory, + ) + .build(&metadata) + .expect("building candidate") + .expect("candidate expected"); + + let mut row_filter = DatafusionArrowPredicate::try_new( + candidate, + &metadata, + Count::new(), + Count::new(), + Time::new(), + ) + .expect("creating filter predicate"); + + let mut parquet_reader = parquet_reader_builder + .with_projection(row_filter.projection().clone()) + .build() + .expect("building reader"); + + // Parquet file is small, we only need 1 record batch + let first_rb = parquet_reader + .next() + .expect("expected record batch") + .expect("expected error free record batch"); + + let filtered = row_filter.evaluate(first_rb.clone()); + assert!(matches!(filtered, Ok(a) if a == BooleanArray::from(vec![false; 8]))); + + // Test all should pass + let expr = col("timestamp_col").gt(Expr::Literal( + ScalarValue::TimestampNanosecond(Some(0), Some(Arc::from("UTC"))), + None, + )); + let expr = logical2physical(&expr, &table_schema); + let schema_adapter_factory = Arc::new(DefaultSchemaAdapterFactory); + let candidate = FilterCandidateBuilder::new( + expr, + file_schema, + table_schema, + schema_adapter_factory, + ) + .build(&metadata) + .expect("building candidate") + .expect("candidate expected"); + + let mut row_filter = DatafusionArrowPredicate::try_new( + candidate, + &metadata, + Count::new(), + Count::new(), + Time::new(), + ) + .expect("creating filter predicate"); + + let filtered = row_filter.evaluate(first_rb); + assert!(matches!(filtered, Ok(a) if a == BooleanArray::from(vec![true; 8]))); + } + #[test] fn nested_data_structures_prevent_pushdown() { let table_schema = Arc::new(get_lists_table_schema()); diff --git a/datafusion/datasource-parquet/src/source.rs b/datafusion/datasource-parquet/src/source.rs index b7c5b5d37686d..76a1a8c08acb0 100644 --- a/datafusion/datasource-parquet/src/source.rs +++ b/datafusion/datasource-parquet/src/source.rs @@ -39,16 +39,21 @@ use datafusion_common::{DataFusionError, Statistics}; use datafusion_datasource::file::FileSource; use datafusion_datasource::file_scan_config::FileScanConfig; use datafusion_physical_expr::conjunction; +use datafusion_physical_expr::schema_rewriter::DefaultPhysicalExprAdapterFactory; use datafusion_physical_expr_common::physical_expr::fmt_sql; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; -use datafusion_physical_plan::filter_pushdown::FilterPushdownPropagation; -use datafusion_physical_plan::filter_pushdown::PredicateSupports; +use datafusion_physical_plan::filter_pushdown::PushedDown; +use datafusion_physical_plan::filter_pushdown::{ + FilterPushdownPropagation, PushedDownPredicate, +}; use datafusion_physical_plan::metrics::Count; use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet; use datafusion_physical_plan::DisplayFormatType; +use datafusion_common::encryption::map_config_decryption_to_decryption; use itertools::Itertools; use object_store::ObjectStore; + /// Execution plan for reading one or more Parquet files. /// /// ```text @@ -164,7 +169,7 @@ use object_store::ObjectStore; /// ```no_run /// # use std::sync::Arc; /// # use arrow::datatypes::Schema; -/// # use datafusion_datasource::file_scan_config::FileScanConfig; +/// # use datafusion_datasource::file_scan_config::{FileScanConfig, FileScanConfigBuilder}; /// # use datafusion_datasource::PartitionedFile; /// # use datafusion_datasource::source::DataSourceExec; /// @@ -178,9 +183,9 @@ use object_store::ObjectStore; /// .iter() /// .map(|file_group| { /// // create a new exec by copying the existing exec's source config -/// let new_config = base_config -/// .clone() -/// .with_file_groups(vec![file_group.clone()]); +/// let new_config = FileScanConfigBuilder::from(base_config.clone()) +/// .with_file_groups(vec![file_group.clone()]) +/// .build(); /// /// (DataSourceExec::from_data_source(new_config)) /// }) @@ -463,22 +468,60 @@ impl FileSource for ParquetSource { let projection = base_config .file_column_projection_indices() .unwrap_or_else(|| (0..base_config.file_schema.fields().len()).collect()); - let schema_adapter_factory = self - .schema_adapter_factory - .clone() - .unwrap_or_else(|| Arc::new(DefaultSchemaAdapterFactory)); + + if self.schema_adapter_factory.is_some() { + log::warn!("The SchemaAdapter API will be removed from ParquetSource in a future release. \ + Use PhysicalExprAdapterFactory API instead. \ + See https://github.com/apache/datafusion/issues/16800 for discussion and https://datafusion.apache.org/library-user-guide/upgrading.html#datafusion-49-0-0 for upgrade instructions."); + } + + let (expr_adapter_factory, schema_adapter_factory) = match ( + base_config.expr_adapter_factory.as_ref(), + self.schema_adapter_factory.as_ref(), + ) { + (Some(expr_adapter_factory), Some(schema_adapter_factory)) => { + // Use both the schema adapter factory and the expr adapter factory. + // This results in the the SchemaAdapter being used for projections (e.g. a column was selected that is a UInt32 in the file and a UInt64 in the table schema) + // but the PhysicalExprAdapterFactory being used for predicate pushdown and stats pruning. + ( + Some(Arc::clone(expr_adapter_factory)), + Arc::clone(schema_adapter_factory), + ) + } + (Some(expr_adapter_factory), None) => { + // If no custom schema adapter factory is provided but an expr adapter factory is provided use the expr adapter factory alongside the default schema adapter factory. + // This means that the PhysicalExprAdapterFactory will be used for predicate pushdown and stats pruning, while the default schema adapter factory will be used for projections. + ( + Some(Arc::clone(expr_adapter_factory)), + Arc::new(DefaultSchemaAdapterFactory) as _, + ) + } + (None, Some(schema_adapter_factory)) => { + // If a custom schema adapter factory is provided but no expr adapter factory is provided use the custom SchemaAdapter for both projections and predicate pushdown. + // This maximizes compatiblity with existing code that uses the SchemaAdapter API and did not explicitly opt into the PhysicalExprAdapterFactory API. + (None, Arc::clone(schema_adapter_factory) as _) + } + (None, None) => { + // If no custom schema adapter factory or expr adapter factory is provided, use the default schema adapter factory and the default physical expr adapter factory. + // This means that the default SchemaAdapter will be used for projections (e.g. a column was selected that is a UInt32 in the file and a UInt64 in the table schema) + // and the default PhysicalExprAdapterFactory will be used for predicate pushdown and stats pruning. + // This is the default behavior with not customization and means that most users of DataFusion will be cut over to the new PhysicalExprAdapterFactory API. + ( + Some(Arc::new(DefaultPhysicalExprAdapterFactory) as _), + Arc::new(DefaultSchemaAdapterFactory) as _, + ) + } + }; let parquet_file_reader_factory = self.parquet_file_reader_factory.clone().unwrap_or_else(|| { Arc::new(DefaultParquetFileReaderFactory::new(object_store)) as _ }); - let file_decryption_properties = self - .table_parquet_options() - .crypto - .file_decryption - .as_ref() - .map(|props| Arc::new(props.clone().into())); + let file_decryption_properties = map_config_decryption_to_decryption( + self.table_parquet_options().crypto.file_decryption.as_ref(), + ) + .map(Arc::new); let coerce_int96 = self .table_parquet_options @@ -508,6 +551,7 @@ impl FileSource for ParquetSource { schema_adapter_factory, coerce_int96, file_decryption_properties, + expr_adapter_factory, }) } @@ -621,7 +665,9 @@ impl FileSource for ParquetSource { config: &ConfigOptions, ) -> datafusion_common::Result>> { let Some(file_schema) = self.file_schema.clone() else { - return Ok(FilterPushdownPropagation::unsupported(filters)); + return Ok(FilterPushdownPropagation::with_parent_pushdown_result( + vec![PushedDown::No; filters.len()], + )); }; // Determine if based on configs we should push filters down. // If either the table / scan itself or the config has pushdown enabled, @@ -635,20 +681,38 @@ impl FileSource for ParquetSource { let pushdown_filters = table_pushdown_enabled || config_pushdown_enabled; let mut source = self.clone(); - let filters = PredicateSupports::new_with_supported_check(filters, |filter| { - can_expr_be_pushed_down_with_schemas(filter, &file_schema) - }); - if filters.is_all_unsupported() { + let filters: Vec = filters + .into_iter() + .map(|filter| { + if can_expr_be_pushed_down_with_schemas(&filter, &file_schema) { + PushedDownPredicate::supported(filter) + } else { + PushedDownPredicate::unsupported(filter) + } + }) + .collect(); + if filters + .iter() + .all(|f| matches!(f.discriminant, PushedDown::No)) + { // No filters can be pushed down, so we can just return the remaining filters // and avoid replacing the source in the physical plan. - return Ok(FilterPushdownPropagation::with_filters(filters)); + return Ok(FilterPushdownPropagation::with_parent_pushdown_result( + vec![PushedDown::No; filters.len()], + )); } - let allowed_filters = filters.collect_supported(); + let allowed_filters = filters + .iter() + .filter_map(|f| match f.discriminant { + PushedDown::Yes => Some(Arc::clone(&f.predicate)), + PushedDown::No => None, + }) + .collect_vec(); let predicate = match source.predicate { - Some(predicate) => conjunction( - std::iter::once(predicate).chain(allowed_filters.iter().cloned()), - ), - None => conjunction(allowed_filters.iter().cloned()), + Some(predicate) => { + conjunction(std::iter::once(predicate).chain(allowed_filters)) + } + None => conjunction(allowed_filters), }; source.predicate = Some(predicate); source = source.with_pushdown_filters(pushdown_filters); @@ -656,12 +720,15 @@ impl FileSource for ParquetSource { // If pushdown_filters is false we tell our parents that they still have to handle the filters, // even if we updated the predicate to include the filters (they will only be used for stats pruning). if !pushdown_filters { - return Ok(FilterPushdownPropagation::with_filters( - filters.make_unsupported(), + return Ok(FilterPushdownPropagation::with_parent_pushdown_result( + vec![PushedDown::No; filters.len()], ) .with_updated_node(source)); } - Ok(FilterPushdownPropagation::with_filters(filters).with_updated_node(source)) + Ok(FilterPushdownPropagation::with_parent_pushdown_result( + filters.iter().map(|f| f.discriminant).collect(), + ) + .with_updated_node(source)) } fn with_schema_adapter_factory( diff --git a/datafusion/datasource-parquet/tests/apply_schema_adapter_tests.rs b/datafusion/datasource-parquet/tests/apply_schema_adapter_tests.rs index 955cd224e6a43..e9288a5f80f68 100644 --- a/datafusion/datasource-parquet/tests/apply_schema_adapter_tests.rs +++ b/datafusion/datasource-parquet/tests/apply_schema_adapter_tests.rs @@ -104,7 +104,7 @@ mod parquet_adapter_tests { batch.columns().to_vec(), &options, ) - .map_err(|e| DataFusionError::ArrowError(e, None)) + .map_err(|e| DataFusionError::ArrowError(Box::new(e), None)) } fn map_column_statistics( diff --git a/datafusion/datasource/src/file.rs b/datafusion/datasource/src/file.rs index c5f21ebf1a0f3..29fa38a8ee36e 100644 --- a/datafusion/datasource/src/file.rs +++ b/datafusion/datasource/src/file.rs @@ -30,7 +30,7 @@ use arrow::datatypes::SchemaRef; use datafusion_common::config::ConfigOptions; use datafusion_common::{not_impl_err, Result, Statistics}; use datafusion_physical_expr::{LexOrdering, PhysicalExpr}; -use datafusion_physical_plan::filter_pushdown::FilterPushdownPropagation; +use datafusion_physical_plan::filter_pushdown::{FilterPushdownPropagation, PushedDown}; use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet; use datafusion_physical_plan::DisplayFormatType; @@ -120,7 +120,9 @@ pub trait FileSource: Send + Sync { filters: Vec>, _config: &ConfigOptions, ) -> Result>> { - Ok(FilterPushdownPropagation::unsupported(filters)) + Ok(FilterPushdownPropagation::with_parent_pushdown_result( + vec![PushedDown::No; filters.len()], + )) } /// Set optional schema adapter factory. diff --git a/datafusion/datasource/src/file_format.rs b/datafusion/datasource/src/file_format.rs index b2caf5277a25f..e0239ab36da09 100644 --- a/datafusion/datasource/src/file_format.rs +++ b/datafusion/datasource/src/file_format.rs @@ -61,6 +61,9 @@ pub trait FileFormat: Send + Sync + fmt::Debug { _file_compression_type: &FileCompressionType, ) -> Result; + /// Returns whether this instance uses compression if applicable + fn compression_type(&self) -> Option; + /// Infer the common schema of the provided objects. The objects will usually /// be analysed up to a given number of records or files (as specified in the /// format config) then give the estimated common schema. This might fail if diff --git a/datafusion/datasource/src/file_scan_config.rs b/datafusion/datasource/src/file_scan_config.rs index 431b6ab0bcf0d..95cc9e24b6451 100644 --- a/datafusion/datasource/src/file_scan_config.rs +++ b/datafusion/datasource/src/file_scan_config.rs @@ -53,6 +53,7 @@ use datafusion_execution::{ object_store::ObjectStoreUrl, SendableRecordBatchStream, TaskContext, }; use datafusion_physical_expr::expressions::Column; +use datafusion_physical_expr::schema_rewriter::PhysicalExprAdapterFactory; use datafusion_physical_expr::{EquivalenceProperties, Partitioning}; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr}; @@ -71,7 +72,7 @@ use log::{debug, warn}; /// The base configurations for a [`DataSourceExec`], the a physical plan for /// any given file format. /// -/// Use [`Self::build`] to create a [`DataSourceExec`] from a ``FileScanConfig`. +/// Use [`DataSourceExec::from_data_source`] to create a [`DataSourceExec`] from a ``FileScanConfig`. /// /// # Example /// ``` @@ -188,6 +189,9 @@ pub struct FileScanConfig { /// Batch size while creating new batches /// Defaults to [`datafusion_common::config::ExecutionOptions`] batch_size. pub batch_size: Option, + /// Expression adapter used to adapt filters and projections that are pushed down into the scan + /// from the logical schema to the physical schema of the file. + pub expr_adapter_factory: Option>, } /// A builder for [`FileScanConfig`]'s. @@ -265,6 +269,7 @@ pub struct FileScanConfigBuilder { file_compression_type: Option, new_lines_in_values: Option, batch_size: Option, + expr_adapter_factory: Option>, } impl FileScanConfigBuilder { @@ -293,6 +298,7 @@ impl FileScanConfigBuilder { table_partition_cols: vec![], constraints: None, batch_size: None, + expr_adapter_factory: None, } } @@ -401,6 +407,20 @@ impl FileScanConfigBuilder { self } + /// Register an expression adapter used to adapt filters and projections that are pushed down into the scan + /// from the logical schema to the physical schema of the file. + /// This can include things like: + /// - Column ordering changes + /// - Handling of missing columns + /// - Rewriting expression to use pre-computed values or file format specific optimizations + pub fn with_expr_adapter( + mut self, + expr_adapter: Option>, + ) -> Self { + self.expr_adapter_factory = expr_adapter; + self + } + /// Build the final [`FileScanConfig`] with all the configured settings. /// /// This method takes ownership of the builder and returns the constructed `FileScanConfig`. @@ -420,6 +440,7 @@ impl FileScanConfigBuilder { file_compression_type, new_lines_in_values, batch_size, + expr_adapter_factory: expr_adapter, } = self; let constraints = constraints.unwrap_or_default(); @@ -446,6 +467,7 @@ impl FileScanConfigBuilder { file_compression_type, new_lines_in_values, batch_size, + expr_adapter_factory: expr_adapter, } } } @@ -466,6 +488,7 @@ impl From for FileScanConfigBuilder { table_partition_cols: config.table_partition_cols, constraints: Some(config.constraints), batch_size: config.batch_size, + expr_adapter_factory: config.expr_adapter_factory, } } } @@ -646,64 +669,6 @@ impl DataSource for FileScanConfig { } impl FileScanConfig { - /// Create a new [`FileScanConfig`] with default settings for scanning files. - /// - /// See example on [`FileScanConfig`] - /// - /// No file groups are added by default. See [`Self::with_file`], [`Self::with_file_group`] and - /// [`Self::with_file_groups`]. - /// - /// # Parameters: - /// * `object_store_url`: See [`Self::object_store_url`] - /// * `file_schema`: See [`Self::file_schema`] - #[allow(deprecated)] // `new` will be removed same time as `with_source` - pub fn new( - object_store_url: ObjectStoreUrl, - file_schema: SchemaRef, - file_source: Arc, - ) -> Self { - let statistics = Statistics::new_unknown(&file_schema); - let file_source = file_source - .with_statistics(statistics.clone()) - .with_schema(Arc::clone(&file_schema)); - Self { - object_store_url, - file_schema, - file_groups: vec![], - constraints: Constraints::default(), - projection: None, - limit: None, - table_partition_cols: vec![], - output_ordering: vec![], - file_compression_type: FileCompressionType::UNCOMPRESSED, - new_lines_in_values: false, - file_source: Arc::clone(&file_source), - batch_size: None, - } - } - - /// Set the file source - #[deprecated(since = "47.0.0", note = "use FileScanConfigBuilder instead")] - pub fn with_source(mut self, file_source: Arc) -> Self { - self.file_source = - file_source.with_statistics(Statistics::new_unknown(&self.file_schema)); - self - } - - /// Set the table constraints of the files - #[deprecated(since = "47.0.0", note = "use FileScanConfigBuilder instead")] - pub fn with_constraints(mut self, constraints: Constraints) -> Self { - self.constraints = constraints; - self - } - - /// Set the statistics of the files - #[deprecated(since = "47.0.0", note = "use FileScanConfigBuilder instead")] - pub fn with_statistics(mut self, statistics: Statistics) -> Self { - self.file_source = self.file_source.with_statistics(statistics); - self - } - fn projection_indices(&self) -> Vec { match &self.projection { Some(proj) => proj.clone(), @@ -764,88 +729,6 @@ impl FileScanConfig { self.constraints.project(&indexes).unwrap_or_default() } - /// Set the projection of the files - #[deprecated(since = "47.0.0", note = "use FileScanConfigBuilder instead")] - pub fn with_projection(mut self, projection: Option>) -> Self { - self.projection = projection; - self - } - - /// Set the limit of the files - #[deprecated(since = "47.0.0", note = "use FileScanConfigBuilder instead")] - pub fn with_limit(mut self, limit: Option) -> Self { - self.limit = limit; - self - } - - /// Add a file as a single group - /// - /// See [Self::file_groups] for more information. - #[deprecated(since = "47.0.0", note = "use FileScanConfigBuilder instead")] - #[allow(deprecated)] - pub fn with_file(self, file: PartitionedFile) -> Self { - self.with_file_group(FileGroup::new(vec![file])) - } - - /// Add the file groups - /// - /// See [Self::file_groups] for more information. - #[deprecated(since = "47.0.0", note = "use FileScanConfigBuilder instead")] - pub fn with_file_groups(mut self, mut file_groups: Vec) -> Self { - self.file_groups.append(&mut file_groups); - self - } - - /// Add a new file group - /// - /// See [Self::file_groups] for more information - #[deprecated(since = "47.0.0", note = "use FileScanConfigBuilder instead")] - pub fn with_file_group(mut self, file_group: FileGroup) -> Self { - self.file_groups.push(file_group); - self - } - - /// Set the partitioning columns of the files - #[deprecated(since = "47.0.0", note = "use FileScanConfigBuilder instead")] - pub fn with_table_partition_cols(mut self, table_partition_cols: Vec) -> Self { - self.table_partition_cols = table_partition_cols - .into_iter() - .map(|f| Arc::new(f) as FieldRef) - .collect(); - self - } - - /// Set the output ordering of the files - #[deprecated(since = "47.0.0", note = "use FileScanConfigBuilder instead")] - pub fn with_output_ordering(mut self, output_ordering: Vec) -> Self { - self.output_ordering = output_ordering; - self - } - - /// Set the file compression type - #[deprecated(since = "47.0.0", note = "use FileScanConfigBuilder instead")] - pub fn with_file_compression_type( - mut self, - file_compression_type: FileCompressionType, - ) -> Self { - self.file_compression_type = file_compression_type; - self - } - - /// Set the new_lines_in_values property - #[deprecated(since = "47.0.0", note = "use FileScanConfigBuilder instead")] - pub fn with_newlines_in_values(mut self, new_lines_in_values: bool) -> Self { - self.new_lines_in_values = new_lines_in_values; - self - } - - /// Set the batch_size property - #[deprecated(since = "47.0.0", note = "use FileScanConfigBuilder instead")] - pub fn with_batch_size(mut self, batch_size: Option) -> Self { - self.batch_size = batch_size; - self - } - /// Specifies whether newlines in (quoted) values are supported. /// /// Parsing newlines in quoted values may be affected by execution behaviour such as @@ -1075,12 +958,6 @@ impl FileScanConfig { .collect()) } - /// Returns a new [`DataSourceExec`] to scan the files specified by this config - #[deprecated(since = "47.0.0", note = "use DataSourceExec::new instead")] - pub fn build(self) -> Arc { - DataSourceExec::from_data_source(self) - } - /// Write the data_type based on file_source fn fmt_file_source(&self, t: DisplayFormatType, f: &mut Formatter) -> FmtResult { write!(f, ", file_type={}", self.file_source.file_type())?; @@ -1895,13 +1772,28 @@ mod tests { struct File { name: &'static str, date: &'static str, - statistics: Vec>, + statistics: Vec, Option)>>, } impl File { fn new( name: &'static str, date: &'static str, statistics: Vec>, + ) -> Self { + Self::new_nullable( + name, + date, + statistics + .into_iter() + .map(|opt| opt.map(|(min, max)| (Some(min), Some(max)))) + .collect(), + ) + } + + fn new_nullable( + name: &'static str, + date: &'static str, + statistics: Vec, Option)>>, ) -> Self { Self { name, @@ -1968,21 +1860,35 @@ mod tests { sort: vec![col("value").sort(false, true)], expected_result: Ok(vec![vec!["1", "0"], vec!["2"]]), }, - // reject nullable sort columns TestCase { - name: "no nullable sort columns", + name: "nullable sort columns, nulls last", file_schema: Schema::new(vec![Field::new( "value".to_string(), DataType::Float64, - true, // should fail because nullable + true, )]), files: vec![ - File::new("0", "2023-01-01", vec![Some((0.00, 0.49))]), - File::new("1", "2023-01-01", vec![Some((0.50, 1.00))]), - File::new("2", "2023-01-02", vec![Some((0.00, 1.00))]), + File::new_nullable("0", "2023-01-01", vec![Some((Some(0.00), Some(0.49)))]), + File::new_nullable("1", "2023-01-01", vec![Some((Some(0.50), None))]), + File::new_nullable("2", "2023-01-02", vec![Some((Some(0.00), None))]), ], sort: vec![col("value").sort(true, false)], - expected_result: Err("construct min/max statistics for split_groups_by_statistics\ncaused by\nbuild min rows\ncaused by\ncreate sorting columns\ncaused by\nError during planning: cannot sort by nullable column") + expected_result: Ok(vec![vec!["0", "1"], vec!["2"]]) + }, + TestCase { + name: "nullable sort columns, nulls first", + file_schema: Schema::new(vec![Field::new( + "value".to_string(), + DataType::Float64, + true, + )]), + files: vec![ + File::new_nullable("0", "2023-01-01", vec![Some((None, Some(0.49)))]), + File::new_nullable("1", "2023-01-01", vec![Some((Some(0.50), Some(1.00)))]), + File::new_nullable("2", "2023-01-02", vec![Some((None, Some(1.00)))]), + ], + sort: vec![col("value").sort(true, true)], + expected_result: Ok(vec![vec!["0", "1"], vec!["2"]]) }, TestCase { name: "all three non-overlapping", @@ -2142,12 +2048,12 @@ mod tests { .map(|stats| { stats .map(|(min, max)| ColumnStatistics { - min_value: Precision::Exact(ScalarValue::from( - min, - )), - max_value: Precision::Exact(ScalarValue::from( - max, - )), + min_value: Precision::Exact( + ScalarValue::Float64(min), + ), + max_value: Precision::Exact( + ScalarValue::Float64(max), + ), ..Default::default() }) .unwrap_or_default() diff --git a/datafusion/datasource/src/file_sink_config.rs b/datafusion/datasource/src/file_sink_config.rs index 8a86b11a4743d..2968bd1ee0449 100644 --- a/datafusion/datasource/src/file_sink_config.rs +++ b/datafusion/datasource/src/file_sink_config.rs @@ -22,14 +22,12 @@ use crate::sink::DataSink; use crate::write::demux::{start_demuxer_task, DemuxedStreamReceiver}; use crate::ListingTableUrl; -use arrow::array::RecordBatch; use arrow::datatypes::{DataType, SchemaRef}; use datafusion_common::Result; use datafusion_common_runtime::SpawnedTask; use datafusion_execution::object_store::ObjectStoreUrl; use datafusion_execution::{SendableRecordBatchStream, TaskContext}; use datafusion_expr::dml::InsertOp; -use datafusion_physical_plan::stream::RecordBatchStreamAdapter; use async_trait::async_trait; use object_store::ObjectStore; @@ -79,34 +77,13 @@ pub trait FileSink: DataSink { .runtime_env() .object_store(&config.object_store_url)?; let (demux_task, file_stream_rx) = start_demuxer_task(config, data, context); - let mut num_rows = self - .spawn_writer_tasks_and_join( - context, - demux_task, - file_stream_rx, - Arc::clone(&object_store), - ) - .await?; - if num_rows == 0 { - // If no rows were written, then no files are output either. - // In this case, send an empty record batch through to ensure the output file is generated - let schema = Arc::clone(&config.output_schema); - let empty_batch = RecordBatch::new_empty(Arc::clone(&schema)); - let data = Box::pin(RecordBatchStreamAdapter::new( - schema, - futures::stream::iter(vec![Ok(empty_batch)]), - )); - let (demux_task, file_stream_rx) = start_demuxer_task(config, data, context); - num_rows = self - .spawn_writer_tasks_and_join( - context, - demux_task, - file_stream_rx, - Arc::clone(&object_store), - ) - .await?; - } - Ok(num_rows) + self.spawn_writer_tasks_and_join( + context, + demux_task, + file_stream_rx, + object_store, + ) + .await } } diff --git a/datafusion/datasource/src/file_stream.rs b/datafusion/datasource/src/file_stream.rs index 25546b3263c97..868b980b6476a 100644 --- a/datafusion/datasource/src/file_stream.rs +++ b/datafusion/datasource/src/file_stream.rs @@ -436,7 +436,7 @@ impl StartableTime { /// (not cpu time) so they include time spent waiting on I/O as well /// as other operators. /// -/// [`FileStream`]: +/// [`FileStream`]: pub struct FileStreamMetrics { /// Wall clock time elapsed for file opening. /// @@ -447,13 +447,13 @@ pub struct FileStreamMetrics { /// will open the next file in the background while scanning the /// current file. This metric will only capture time spent opening /// while not also scanning. - /// [`FileStream`]: + /// [`FileStream`]: pub time_opening: StartableTime, /// Wall clock time elapsed for file scanning + first record batch of decompression + decoding /// /// Time between when the [`FileStream`] requests data from the /// stream and when the first [`RecordBatch`] is produced. - /// [`FileStream`]: + /// [`FileStream`]: pub time_scanning_until_data: StartableTime, /// Total elapsed wall clock time for scanning + record batch decompression / decoding /// diff --git a/datafusion/datasource/src/mod.rs b/datafusion/datasource/src/mod.rs index 92e25a97c3a4d..3cd4a1a6c1c96 100644 --- a/datafusion/datasource/src/mod.rs +++ b/datafusion/datasource/src/mod.rs @@ -102,9 +102,9 @@ pub struct PartitionedFile { /// You may use [`wrap_partition_value_in_dict`] to wrap them if you have used [`wrap_partition_type_in_dict`] to wrap the column type. /// /// - /// [`wrap_partition_type_in_dict`]: https://github.com/apache/datafusion/blob/main/datafusion/core/src/datasource/physical_plan/file_scan_config.rs#L55 - /// [`wrap_partition_value_in_dict`]: https://github.com/apache/datafusion/blob/main/datafusion/core/src/datasource/physical_plan/file_scan_config.rs#L62 - /// [`table_partition_cols`]: https://github.com/apache/datafusion/blob/main/datafusion/core/src/datasource/file_format/options.rs#L190 + /// [`wrap_partition_type_in_dict`]: crate::file_scan_config::wrap_partition_type_in_dict + /// [`wrap_partition_value_in_dict`]: crate::file_scan_config::wrap_partition_value_in_dict + /// [`table_partition_cols`]: https://github.com/apache/datafusion/blob/main/datafusion/core/src/datasource/file_format/options.rs#L87 pub partition_values: Vec, /// An optional file range for a more fine-grained parallel execution pub range: Option, diff --git a/datafusion/datasource/src/schema_adapter.rs b/datafusion/datasource/src/schema_adapter.rs index b43041c8d14db..5e743a3f0c233 100644 --- a/datafusion/datasource/src/schema_adapter.rs +++ b/datafusion/datasource/src/schema_adapter.rs @@ -57,6 +57,17 @@ pub trait SchemaAdapterFactory: Debug + Send + Sync + 'static { projected_table_schema: SchemaRef, table_schema: SchemaRef, ) -> Box; + + /// Create a [`SchemaAdapter`] using only the projected table schema. + /// + /// This is a convenience method for cases where the table schema and the + /// projected table schema are the same. + fn create_with_projected_schema( + &self, + projected_table_schema: SchemaRef, + ) -> Box { + self.create(Arc::clone(&projected_table_schema), projected_table_schema) + } } /// Creates [`SchemaMapper`]s to map file-level [`RecordBatch`]es to a table diff --git a/datafusion/datasource/src/source.rs b/datafusion/datasource/src/source.rs index 4dda95b0856b1..d05a25a67955e 100644 --- a/datafusion/datasource/src/source.rs +++ b/datafusion/datasource/src/source.rs @@ -25,20 +25,26 @@ use std::sync::Arc; use datafusion_physical_plan::execution_plan::{ Boundedness, EmissionType, SchedulingType, }; +use datafusion_physical_plan::metrics::SplitMetrics; use datafusion_physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet}; use datafusion_physical_plan::projection::ProjectionExec; +use datafusion_physical_plan::stream::BatchSplitStream; use datafusion_physical_plan::{ DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, }; +use itertools::Itertools; use crate::file_scan_config::FileScanConfig; use datafusion_common::config::ConfigOptions; use datafusion_common::{Constraints, Result, Statistics}; use datafusion_execution::{SendableRecordBatchStream, TaskContext}; -use datafusion_physical_expr::{EquivalenceProperties, Partitioning, PhysicalExpr}; +use datafusion_physical_expr::{ + conjunction, EquivalenceProperties, Partitioning, PhysicalExpr, +}; use datafusion_physical_expr_common::sort_expr::LexOrdering; +use datafusion_physical_plan::filter::collect_columns_from_predicate; use datafusion_physical_plan::filter_pushdown::{ - ChildPushdownResult, FilterPushdownPhase, FilterPushdownPropagation, + ChildPushdownResult, FilterPushdownPhase, FilterPushdownPropagation, PushedDown, }; /// A source of data, typically a list of files or memory @@ -168,7 +174,9 @@ pub trait DataSource: Send + Sync + Debug { filters: Vec>, _config: &ConfigOptions, ) -> Result>> { - Ok(FilterPushdownPropagation::unsupported(filters)) + Ok(FilterPushdownPropagation::with_parent_pushdown_result( + vec![PushedDown::No; filters.len()], + )) } } @@ -261,17 +269,24 @@ impl ExecutionPlan for DataSourceExec { partition: usize, context: Arc, ) -> Result { - self.data_source.open(partition, Arc::clone(&context)) + let stream = self.data_source.open(partition, Arc::clone(&context))?; + let batch_size = context.session_config().batch_size(); + log::debug!( + "Batch splitting enabled for partition {partition}: batch_size={batch_size}" + ); + let metrics = self.data_source.metrics(); + let split_metrics = SplitMetrics::new(&metrics, partition); + Ok(Box::pin(BatchSplitStream::new( + stream, + batch_size, + split_metrics, + ))) } fn metrics(&self) -> Option { Some(self.data_source.metrics().clone_inner()) } - fn statistics(&self) -> Result { - self.data_source.statistics() - } - fn partition_statistics(&self, partition: Option) -> Result { if let Some(partition) = partition { let mut statistics = Statistics::new_unknown(&self.schema()); @@ -315,16 +330,33 @@ impl ExecutionPlan for DataSourceExec { config: &ConfigOptions, ) -> Result>> { // Push any remaining filters into our data source - let res = self.data_source.try_pushdown_filters( - child_pushdown_result.parent_filters.collect_all(), - config, - )?; + let parent_filters = child_pushdown_result + .parent_filters + .into_iter() + .map(|f| f.filter) + .collect_vec(); + let res = self + .data_source + .try_pushdown_filters(parent_filters.clone(), config)?; match res.updated_node { Some(data_source) => { let mut new_node = self.clone(); new_node.data_source = data_source; new_node.cache = Self::compute_properties(Arc::clone(&new_node.data_source)); + + // Recompute equivalence info using new filters + let filter = conjunction( + res.filters + .iter() + .zip(parent_filters) + .filter_map(|(s, f)| match s { + PushedDown::Yes => Some(f), + PushedDown::No => None, + }) + .collect_vec(), + ); + new_node = new_node.add_filter_equivalence_info(filter)?; Ok(FilterPushdownPropagation { filters: res.filters, updated_node: Some(Arc::new(new_node)), @@ -372,6 +404,20 @@ impl DataSourceExec { self } + /// Add filters' equivalence info + fn add_filter_equivalence_info( + mut self, + filter: Arc, + ) -> Result { + let (equal_pairs, _) = collect_columns_from_predicate(&filter); + for (lhs, rhs) in equal_pairs { + self.cache + .eq_properties + .add_equal_conditions(Arc::clone(lhs), Arc::clone(rhs))? + } + Ok(self) + } + fn compute_properties(data_source: Arc) -> PlanProperties { PlanProperties::new( data_source.eq_properties(), diff --git a/datafusion/datasource/src/statistics.rs b/datafusion/datasource/src/statistics.rs index db9af0ff76754..5099bfa072204 100644 --- a/datafusion/datasource/src/statistics.rs +++ b/datafusion/datasource/src/statistics.rs @@ -157,12 +157,18 @@ impl MinMaxStatistics { &min_max_schema, RecordBatch::try_new(Arc::clone(&min_max_schema), min_values).map_err( |e| { - DataFusionError::ArrowError(e, Some("\ncreate min batch".to_string())) + DataFusionError::ArrowError( + Box::new(e), + Some("\ncreate min batch".to_string()), + ) }, )?, RecordBatch::try_new(Arc::clone(&min_max_schema), max_values).map_err( |e| { - DataFusionError::ArrowError(e, Some("\ncreate max batch".to_string())) + DataFusionError::ArrowError( + Box::new(e), + Some("\ncreate max batch".to_string()), + ) }, )?, ) @@ -224,14 +230,7 @@ impl MinMaxStatistics { .zip(sort_columns.iter().copied()) .map(|(sort_expr, column)| { let schema = values.schema(); - let idx = schema.index_of(column.name())?; - let field = schema.field(idx); - - // check that sort columns are non-nullable - if field.is_nullable() { - return plan_err!("cannot sort by nullable column"); - } Ok(SortColumn { values: Arc::clone(values.column(idx)), @@ -248,7 +247,10 @@ impl MinMaxStatistics { .collect::>(), ) .map_err(|e| { - DataFusionError::ArrowError(e, Some("convert columns".to_string())) + DataFusionError::ArrowError( + Box::new(e), + Some("convert columns".to_string()), + ) }) }); diff --git a/datafusion/datasource/src/url.rs b/datafusion/datasource/src/url.rs index 348791be9828d..1dc12f7d1d060 100644 --- a/datafusion/datasource/src/url.rs +++ b/datafusion/datasource/src/url.rs @@ -268,7 +268,7 @@ impl ListingTableUrl { let glob_match = self.contains(path, ignore_subdirectory); futures::future::ready(extension_match && glob_match) }) - .map_err(DataFusionError::ObjectStore) + .map_err(|e| DataFusionError::ObjectStore(Box::new(e))) .boxed()) } diff --git a/datafusion/datasource/src/write/orchestration.rs b/datafusion/datasource/src/write/orchestration.rs index a09509ac58626..c6d4b25cbccd9 100644 --- a/datafusion/datasource/src/write/orchestration.rs +++ b/datafusion/datasource/src/write/orchestration.rs @@ -285,8 +285,8 @@ pub async fn spawn_writer_tasks_and_join( write_coordinator_task.join_unwind(), demux_task.join_unwind() ); - r1.map_err(DataFusionError::ExecutionJoin)??; - r2.map_err(DataFusionError::ExecutionJoin)??; + r1.map_err(|e| DataFusionError::ExecutionJoin(Box::new(e)))??; + r2.map_err(|e| DataFusionError::ExecutionJoin(Box::new(e)))??; // Return total row count: rx_row_cnt.await.map_err(|_| { diff --git a/datafusion/doc/src/lib.rs b/datafusion/doc/src/lib.rs index f9b916c2b3aba..ca74c3b06d6dc 100644 --- a/datafusion/doc/src/lib.rs +++ b/datafusion/doc/src/lib.rs @@ -39,7 +39,7 @@ /// thus all text should be in English. /// /// [SQL function documentation]: https://datafusion.apache.org/user-guide/sql/index.html -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Hash)] pub struct Documentation { /// The section in the documentation where the UDF will be documented pub doc_section: DocSection, @@ -158,7 +158,7 @@ impl Documentation { } } -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq, Hash)] pub struct DocSection { /// True to include this doc section in the public /// documentation, false otherwise diff --git a/datafusion/expr-common/src/interval_arithmetic.rs b/datafusion/expr-common/src/interval_arithmetic.rs index d656c676bd01d..16a8caad823e2 100644 --- a/datafusion/expr-common/src/interval_arithmetic.rs +++ b/datafusion/expr-common/src/interval_arithmetic.rs @@ -754,6 +754,17 @@ impl Interval { } } + /// Decide if this interval is a superset of `other`. If argument `strict` + /// is `true`, only returns `true` if this interval is a strict superset. + /// + /// NOTE: This function only works with intervals of the same data type. + /// Attempting to compare intervals of different data types will lead + /// to an error. + pub fn is_superset(&self, other: &Interval, strict: bool) -> Result { + Ok(!(strict && self.eq(other)) + && (self.contains(other)? == Interval::CERTAINLY_TRUE)) + } + /// Add the given interval (`other`) to this interval. Say we have intervals /// `[a1, b1]` and `[a2, b2]`, then their sum is `[a1 + a2, b1 + b2]`. Note /// that this represents all possible values the sum can take if one can @@ -3805,4 +3816,138 @@ mod tests { let upper = 1.5; capture_mode_change_f32((lower, upper), true, true); } + + #[test] + fn test_is_superset() -> Result<()> { + // Test cases: (interval1, interval2, strict, expected) + let test_cases = vec![ + // Equal intervals - non-strict should be true, strict should be false + ( + Interval::make(Some(10_i32), Some(50_i32))?, + Interval::make(Some(10_i32), Some(50_i32))?, + false, + true, + ), + ( + Interval::make(Some(10_i32), Some(50_i32))?, + Interval::make(Some(10_i32), Some(50_i32))?, + true, + false, + ), + // Unbounded intervals + ( + Interval::make::(None, None)?, + Interval::make(Some(10_i32), Some(50_i32))?, + false, + true, + ), + ( + Interval::make::(None, None)?, + Interval::make::(None, None)?, + false, + true, + ), + ( + Interval::make::(None, None)?, + Interval::make::(None, None)?, + true, + false, + ), + // Half-bounded intervals + ( + Interval::make(Some(0_i32), None)?, + Interval::make(Some(10_i32), Some(50_i32))?, + false, + true, + ), + ( + Interval::make(None, Some(100_i32))?, + Interval::make(Some(10_i32), Some(50_i32))?, + false, + true, + ), + // Non-superset cases - partial overlap + ( + Interval::make(Some(0_i32), Some(50_i32))?, + Interval::make(Some(25_i32), Some(75_i32))?, + false, + false, + ), + ( + Interval::make(Some(0_i32), Some(50_i32))?, + Interval::make(Some(25_i32), Some(75_i32))?, + true, + false, + ), + // Non-superset cases - disjoint intervals + ( + Interval::make(Some(0_i32), Some(50_i32))?, + Interval::make(Some(60_i32), Some(100_i32))?, + false, + false, + ), + // Subset relationship (reversed) + ( + Interval::make(Some(20_i32), Some(80_i32))?, + Interval::make(Some(0_i32), Some(100_i32))?, + false, + false, + ), + // Float cases + ( + Interval::make(Some(0.0_f32), Some(100.0_f32))?, + Interval::make(Some(25.5_f32), Some(75.5_f32))?, + false, + true, + ), + ( + Interval::make(Some(0.0_f64), Some(100.0_f64))?, + Interval::make(Some(0.0_f64), Some(100.0_f64))?, + true, + false, + ), + // Edge cases with single point intervals + ( + Interval::make(Some(0_i32), Some(100_i32))?, + Interval::make(Some(50_i32), Some(50_i32))?, + false, + true, + ), + ( + Interval::make(Some(50_i32), Some(50_i32))?, + Interval::make(Some(50_i32), Some(50_i32))?, + false, + true, + ), + ( + Interval::make(Some(50_i32), Some(50_i32))?, + Interval::make(Some(50_i32), Some(50_i32))?, + true, + false, + ), + // Boundary touch cases + ( + Interval::make(Some(0_i32), Some(50_i32))?, + Interval::make(Some(0_i32), Some(25_i32))?, + false, + true, + ), + ( + Interval::make(Some(0_i32), Some(50_i32))?, + Interval::make(Some(25_i32), Some(50_i32))?, + false, + true, + ), + ]; + + for (interval1, interval2, strict, expected) in test_cases { + let result = interval1.is_superset(&interval2, strict)?; + assert_eq!( + result, expected, + "Failed for interval1: {interval1}, interval2: {interval2}, strict: {strict}", + ); + } + + Ok(()) + } } diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index 955c28c42a3f4..9264a2940dd1b 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -124,6 +124,57 @@ impl<'a> BinaryTypeCoercer<'a> { /// Returns a [`Signature`] for applying `op` to arguments of type `lhs` and `rhs` fn signature(&'a self) -> Result { + if let Some(coerced) = null_coercion(self.lhs, self.rhs) { + use Operator::*; + // Special handling for arithmetic + null coercion: + // For arithmetic operators on non-temporal types, we must handle the result type here using Arrow's numeric kernel. + // This is because Arrow expects concrete numeric types, and this ensures the correct result type (e.g., for NULL + Int32, result is Int32). + // For all other cases (including temporal arithmetic and non-arithmetic operators), + // we can delegate to signature_inner(&coerced, &coerced), which handles the necessary logic for those operators. + // In those cases, signature_inner is designed to work with the coerced type, even if it originated from a NULL. + if matches!(self.op, Plus | Minus | Multiply | Divide | Modulo) + && !coerced.is_temporal() + { + let ret = self.get_result(&coerced, &coerced).map_err(|e| { + plan_datafusion_err!( + "Cannot get result type for arithmetic operation {coerced} {} {coerced}: {e}", + self.op + ) + })?; + + return Ok(Signature { + lhs: coerced.clone(), + rhs: coerced, + ret, + }); + } + return self.signature_inner(&coerced, &coerced); + } + self.signature_inner(self.lhs, self.rhs) + } + + /// Returns the result type for arithmetic operations + fn get_result( + &self, + lhs: &DataType, + rhs: &DataType, + ) -> arrow::error::Result { + use arrow::compute::kernels::numeric::*; + let l = new_empty_array(lhs); + let r = new_empty_array(rhs); + + let result = match self.op { + Operator::Plus => add_wrapping(&l, &r), + Operator::Minus => sub_wrapping(&l, &r), + Operator::Multiply => mul_wrapping(&l, &r), + Operator::Divide => div(&l, &r), + Operator::Modulo => rem(&l, &r), + _ => unreachable!(), + }; + result.map(|x| x.data_type().clone()) + } + + fn signature_inner(&'a self, lhs: &DataType, rhs: &DataType) -> Result { use arrow::datatypes::DataType::*; use Operator::*; let result = match self.op { @@ -135,7 +186,7 @@ impl<'a> BinaryTypeCoercer<'a> { GtEq | IsDistinctFrom | IsNotDistinctFrom => { - comparison_coercion(self.lhs, self.rhs).map(Signature::comparison).ok_or_else(|| { + comparison_coercion(lhs, rhs).map(Signature::comparison).ok_or_else(|| { plan_datafusion_err!( "Cannot infer common argument type for comparison operation {} {} {}", self.lhs, @@ -144,7 +195,7 @@ impl<'a> BinaryTypeCoercer<'a> { ) }) } - And | Or => if matches!((self.lhs, self.rhs), (Boolean | Null, Boolean | Null)) { + And | Or => if matches!((lhs, rhs), (Boolean | Null, Boolean | Null)) { // Logical binary boolean operators can only be evaluated for // boolean or null arguments. Ok(Signature::uniform(Boolean)) @@ -154,28 +205,28 @@ impl<'a> BinaryTypeCoercer<'a> { ) } RegexMatch | RegexIMatch | RegexNotMatch | RegexNotIMatch => { - regex_coercion(self.lhs, self.rhs).map(Signature::comparison).ok_or_else(|| { + regex_coercion(lhs, rhs).map(Signature::comparison).ok_or_else(|| { plan_datafusion_err!( "Cannot infer common argument type for regex operation {} {} {}", self.lhs, self.op, self.rhs ) }) } LikeMatch | ILikeMatch | NotLikeMatch | NotILikeMatch => { - regex_coercion(self.lhs, self.rhs).map(Signature::comparison).ok_or_else(|| { + regex_coercion(lhs, rhs).map(Signature::comparison).ok_or_else(|| { plan_datafusion_err!( "Cannot infer common argument type for regex operation {} {} {}", self.lhs, self.op, self.rhs ) }) } BitwiseAnd | BitwiseOr | BitwiseXor | BitwiseShiftRight | BitwiseShiftLeft => { - bitwise_coercion(self.lhs, self.rhs).map(Signature::uniform).ok_or_else(|| { + bitwise_coercion(lhs, rhs).map(Signature::uniform).ok_or_else(|| { plan_datafusion_err!( "Cannot infer common type for bitwise operation {} {} {}", self.lhs, self.op, self.rhs ) }) } StringConcat => { - string_concat_coercion(self.lhs, self.rhs).map(Signature::uniform).ok_or_else(|| { + string_concat_coercion(lhs, rhs).map(Signature::uniform).ok_or_else(|| { plan_datafusion_err!( "Cannot infer common string type for string concat operation {} {} {}", self.lhs, self.op, self.rhs ) @@ -183,8 +234,8 @@ impl<'a> BinaryTypeCoercer<'a> { } AtArrow | ArrowAt => { // Array contains or search (similar to LIKE) operation - array_coercion(self.lhs, self.rhs) - .or_else(|| like_coercion(self.lhs, self.rhs)).map(Signature::comparison).ok_or_else(|| { + array_coercion(lhs, rhs) + .or_else(|| like_coercion(lhs, rhs)).map(Signature::comparison).ok_or_else(|| { plan_datafusion_err!( "Cannot infer common argument type for operation {} {} {}", self.lhs, self.op, self.rhs ) @@ -192,40 +243,24 @@ impl<'a> BinaryTypeCoercer<'a> { } AtAt => { // text search has similar signature to LIKE - like_coercion(self.lhs, self.rhs).map(Signature::comparison).ok_or_else(|| { + like_coercion(lhs, rhs).map(Signature::comparison).ok_or_else(|| { plan_datafusion_err!( "Cannot infer common argument type for AtAt operation {} {} {}", self.lhs, self.op, self.rhs ) }) } Plus | Minus | Multiply | Divide | Modulo => { - let get_result = |lhs, rhs| { - use arrow::compute::kernels::numeric::*; - let l = new_empty_array(lhs); - let r = new_empty_array(rhs); - - let result = match self.op { - Plus => add_wrapping(&l, &r), - Minus => sub_wrapping(&l, &r), - Multiply => mul_wrapping(&l, &r), - Divide => div(&l, &r), - Modulo => rem(&l, &r), - _ => unreachable!(), - }; - result.map(|x| x.data_type().clone()) - }; - - if let Ok(ret) = get_result(self.lhs, self.rhs) { + if let Ok(ret) = self.get_result(lhs, rhs) { // Temporal arithmetic, e.g. Date32 + Interval Ok(Signature{ - lhs: self.lhs.clone(), - rhs: self.rhs.clone(), + lhs: lhs.clone(), + rhs: rhs.clone(), ret, }) - } else if let Some(coerced) = temporal_coercion_strict_timezone(self.lhs, self.rhs) { + } else if let Some(coerced) = temporal_coercion_strict_timezone(lhs, rhs) { // Temporal arithmetic by first coercing to a common time representation // e.g. Date32 - Timestamp - let ret = get_result(&coerced, &coerced).map_err(|e| { + let ret = self.get_result(&coerced, &coerced).map_err(|e| { plan_datafusion_err!( "Cannot get result type for temporal operation {coerced} {} {coerced}: {e}", self.op ) @@ -235,9 +270,9 @@ impl<'a> BinaryTypeCoercer<'a> { rhs: coerced, ret, }) - } else if let Some((lhs, rhs)) = math_decimal_coercion(self.lhs, self.rhs) { + } else if let Some((lhs, rhs)) = math_decimal_coercion(lhs, rhs) { // Decimal arithmetic, e.g. Decimal(10, 2) + Decimal(10, 0) - let ret = get_result(&lhs, &rhs).map_err(|e| { + let ret = self.get_result(&lhs, &rhs).map_err(|e| { plan_datafusion_err!( "Cannot get result type for decimal operation {} {} {}: {e}", self.lhs, self.op, self.rhs ) @@ -247,7 +282,7 @@ impl<'a> BinaryTypeCoercer<'a> { rhs, ret, }) - } else if let Some(numeric) = mathematics_numerical_coercion(self.lhs, self.rhs) { + } else if let Some(numeric) = mathematics_numerical_coercion(lhs, rhs) { // Numeric arithmetic, e.g. Int32 + Int32 Ok(Signature::uniform(numeric)) } else { @@ -307,17 +342,25 @@ fn math_decimal_coercion( } // Unlike with comparison we don't coerce to a decimal in the case of floating point // numbers, instead falling back to floating point arithmetic instead - (Decimal128(_, _), Int8 | Int16 | Int32 | Int64) => { - Some((lhs_type.clone(), coerce_numeric_type_to_decimal(rhs_type)?)) - } - (Int8 | Int16 | Int32 | Int64, Decimal128(_, _)) => { - Some((coerce_numeric_type_to_decimal(lhs_type)?, rhs_type.clone())) - } - (Decimal256(_, _), Int8 | Int16 | Int32 | Int64) => Some(( + ( + Decimal128(_, _), + Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64, + ) => Some((lhs_type.clone(), coerce_numeric_type_to_decimal(rhs_type)?)), + ( + Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64, + Decimal128(_, _), + ) => Some((coerce_numeric_type_to_decimal(lhs_type)?, rhs_type.clone())), + ( + Decimal256(_, _), + Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64, + ) => Some(( lhs_type.clone(), coerce_numeric_type_to_decimal256(rhs_type)?, )), - (Int8 | Int16 | Int32 | Int64, Decimal256(_, _)) => Some(( + ( + Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64, + Decimal256(_, _), + ) => Some(( coerce_numeric_type_to_decimal256(lhs_type)?, rhs_type.clone(), )), @@ -1501,1085 +1544,4 @@ fn null_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option { } #[cfg(test)] -mod tests { - use super::*; - - use datafusion_common::assert_contains; - - #[test] - fn test_coercion_error() -> Result<()> { - let coercer = - BinaryTypeCoercer::new(&DataType::Float32, &Operator::Plus, &DataType::Utf8); - let result_type = coercer.get_input_types(); - - let e = result_type.unwrap_err(); - assert_eq!(e.strip_backtrace(), "Error during planning: Cannot coerce arithmetic expression Float32 + Utf8 to valid types"); - Ok(()) - } - - #[test] - fn test_decimal_binary_comparison_coercion() -> Result<()> { - let input_decimal = DataType::Decimal128(20, 3); - let input_types = [ - DataType::Int8, - DataType::Int16, - DataType::Int32, - DataType::Int64, - DataType::Float32, - DataType::Float64, - DataType::Decimal128(38, 10), - DataType::Decimal128(20, 8), - DataType::Null, - ]; - let result_types = [ - DataType::Decimal128(20, 3), - DataType::Decimal128(20, 3), - DataType::Decimal128(20, 3), - DataType::Decimal128(23, 3), - DataType::Decimal128(24, 7), - DataType::Decimal128(32, 15), - DataType::Decimal128(38, 10), - DataType::Decimal128(25, 8), - DataType::Decimal128(20, 3), - ]; - let comparison_op_types = [ - Operator::NotEq, - Operator::Eq, - Operator::Gt, - Operator::GtEq, - Operator::Lt, - Operator::LtEq, - ]; - for (i, input_type) in input_types.iter().enumerate() { - let expect_type = &result_types[i]; - for op in comparison_op_types { - let (lhs, rhs) = BinaryTypeCoercer::new(&input_decimal, &op, input_type) - .get_input_types()?; - assert_eq!(expect_type, &lhs); - assert_eq!(expect_type, &rhs); - } - } - // negative test - let result_type = - BinaryTypeCoercer::new(&input_decimal, &Operator::Eq, &DataType::Boolean) - .get_input_types(); - assert!(result_type.is_err()); - Ok(()) - } - - #[test] - fn test_decimal_mathematics_op_type() { - assert_eq!( - coerce_numeric_type_to_decimal(&DataType::Int8).unwrap(), - DataType::Decimal128(3, 0) - ); - assert_eq!( - coerce_numeric_type_to_decimal(&DataType::Int16).unwrap(), - DataType::Decimal128(5, 0) - ); - assert_eq!( - coerce_numeric_type_to_decimal(&DataType::Int32).unwrap(), - DataType::Decimal128(10, 0) - ); - assert_eq!( - coerce_numeric_type_to_decimal(&DataType::Int64).unwrap(), - DataType::Decimal128(20, 0) - ); - assert_eq!( - coerce_numeric_type_to_decimal(&DataType::Float16).unwrap(), - DataType::Decimal128(6, 3) - ); - assert_eq!( - coerce_numeric_type_to_decimal(&DataType::Float32).unwrap(), - DataType::Decimal128(14, 7) - ); - assert_eq!( - coerce_numeric_type_to_decimal(&DataType::Float64).unwrap(), - DataType::Decimal128(30, 15) - ); - } - - #[test] - fn test_dictionary_type_coercion() { - use DataType::*; - - let lhs_type = Dictionary(Box::new(Int8), Box::new(Int32)); - let rhs_type = Dictionary(Box::new(Int8), Box::new(Int16)); - assert_eq!( - dictionary_comparison_coercion(&lhs_type, &rhs_type, true), - Some(Int32) - ); - assert_eq!( - dictionary_comparison_coercion(&lhs_type, &rhs_type, false), - Some(Int32) - ); - - // Since we can coerce values of Int16 to Utf8 can support this - let lhs_type = Dictionary(Box::new(Int8), Box::new(Utf8)); - let rhs_type = Dictionary(Box::new(Int8), Box::new(Int16)); - assert_eq!( - dictionary_comparison_coercion(&lhs_type, &rhs_type, true), - Some(Utf8) - ); - - // Since we can coerce values of Utf8 to Binary can support this - let lhs_type = Dictionary(Box::new(Int8), Box::new(Utf8)); - let rhs_type = Dictionary(Box::new(Int8), Box::new(Binary)); - assert_eq!( - dictionary_comparison_coercion(&lhs_type, &rhs_type, true), - Some(Binary) - ); - - let lhs_type = Dictionary(Box::new(Int8), Box::new(Utf8)); - let rhs_type = Utf8; - assert_eq!( - dictionary_comparison_coercion(&lhs_type, &rhs_type, false), - Some(Utf8) - ); - assert_eq!( - dictionary_comparison_coercion(&lhs_type, &rhs_type, true), - Some(lhs_type.clone()) - ); - - let lhs_type = Utf8; - let rhs_type = Dictionary(Box::new(Int8), Box::new(Utf8)); - assert_eq!( - dictionary_comparison_coercion(&lhs_type, &rhs_type, false), - Some(Utf8) - ); - assert_eq!( - dictionary_comparison_coercion(&lhs_type, &rhs_type, true), - Some(rhs_type.clone()) - ); - } - - /// Test coercion rules for binary operators - /// - /// Applies coercion rules for `$LHS_TYPE $OP $RHS_TYPE` and asserts that - /// the result type is `$RESULT_TYPE` - macro_rules! test_coercion_binary_rule { - ($LHS_TYPE:expr, $RHS_TYPE:expr, $OP:expr, $RESULT_TYPE:expr) => {{ - let (lhs, rhs) = - BinaryTypeCoercer::new(&$LHS_TYPE, &$OP, &$RHS_TYPE).get_input_types()?; - assert_eq!(lhs, $RESULT_TYPE); - assert_eq!(rhs, $RESULT_TYPE); - }}; - } - - /// Test coercion rules for binary operators - /// - /// Applies coercion rules for each RHS_TYPE in $RHS_TYPES such that - /// `$LHS_TYPE $OP RHS_TYPE` and asserts that the result type is `$RESULT_TYPE`. - /// Also tests that the inverse `RHS_TYPE $OP $LHS_TYPE` is true - macro_rules! test_coercion_binary_rule_multiple { - ($LHS_TYPE:expr, $RHS_TYPES:expr, $OP:expr, $RESULT_TYPE:expr) => {{ - for rh_type in $RHS_TYPES { - let (lhs, rhs) = BinaryTypeCoercer::new(&$LHS_TYPE, &$OP, &rh_type) - .get_input_types()?; - assert_eq!(lhs, $RESULT_TYPE); - assert_eq!(rhs, $RESULT_TYPE); - - BinaryTypeCoercer::new(&rh_type, &$OP, &$LHS_TYPE).get_input_types()?; - assert_eq!(lhs, $RESULT_TYPE); - assert_eq!(rhs, $RESULT_TYPE); - } - }}; - } - - /// Test coercion rules for like - /// - /// Applies coercion rules for both - /// * `$LHS_TYPE LIKE $RHS_TYPE` - /// * `$RHS_TYPE LIKE $LHS_TYPE` - /// - /// And asserts the result type is `$RESULT_TYPE` - macro_rules! test_like_rule { - ($LHS_TYPE:expr, $RHS_TYPE:expr, $RESULT_TYPE:expr) => {{ - println!("Coercing {} LIKE {}", $LHS_TYPE, $RHS_TYPE); - let result = like_coercion(&$LHS_TYPE, &$RHS_TYPE); - assert_eq!(result, $RESULT_TYPE); - // reverse the order - let result = like_coercion(&$RHS_TYPE, &$LHS_TYPE); - assert_eq!(result, $RESULT_TYPE); - }}; - } - - #[test] - fn test_date_timestamp_arithmetic_error() -> Result<()> { - let (lhs, rhs) = BinaryTypeCoercer::new( - &DataType::Timestamp(TimeUnit::Nanosecond, None), - &Operator::Minus, - &DataType::Timestamp(TimeUnit::Millisecond, None), - ) - .get_input_types()?; - assert_eq!(lhs.to_string(), "Timestamp(Millisecond, None)"); - assert_eq!(rhs.to_string(), "Timestamp(Millisecond, None)"); - - let err = - BinaryTypeCoercer::new(&DataType::Date32, &Operator::Plus, &DataType::Date64) - .get_input_types() - .unwrap_err() - .to_string(); - - assert_contains!( - &err, - "Cannot get result type for temporal operation Date64 + Date64" - ); - - Ok(()) - } - - #[test] - fn test_like_coercion() { - // string coerce to strings - test_like_rule!(DataType::Utf8, DataType::Utf8, Some(DataType::Utf8)); - test_like_rule!( - DataType::LargeUtf8, - DataType::Utf8, - Some(DataType::LargeUtf8) - ); - test_like_rule!( - DataType::Utf8, - DataType::LargeUtf8, - Some(DataType::LargeUtf8) - ); - test_like_rule!( - DataType::LargeUtf8, - DataType::LargeUtf8, - Some(DataType::LargeUtf8) - ); - - // Also coerce binary to strings - test_like_rule!(DataType::Binary, DataType::Utf8, Some(DataType::Utf8)); - test_like_rule!( - DataType::LargeBinary, - DataType::Utf8, - Some(DataType::LargeUtf8) - ); - test_like_rule!( - DataType::Binary, - DataType::LargeUtf8, - Some(DataType::LargeUtf8) - ); - test_like_rule!( - DataType::LargeBinary, - DataType::LargeUtf8, - Some(DataType::LargeUtf8) - ); - } - - #[test] - fn test_type_coercion() -> Result<()> { - test_coercion_binary_rule!( - DataType::Utf8, - DataType::Date32, - Operator::Eq, - DataType::Date32 - ); - test_coercion_binary_rule!( - DataType::Utf8, - DataType::Date64, - Operator::Lt, - DataType::Date64 - ); - test_coercion_binary_rule!( - DataType::Utf8, - DataType::Time32(TimeUnit::Second), - Operator::Eq, - DataType::Time32(TimeUnit::Second) - ); - test_coercion_binary_rule!( - DataType::Utf8, - DataType::Time32(TimeUnit::Millisecond), - Operator::Eq, - DataType::Time32(TimeUnit::Millisecond) - ); - test_coercion_binary_rule!( - DataType::Utf8, - DataType::Time64(TimeUnit::Microsecond), - Operator::Eq, - DataType::Time64(TimeUnit::Microsecond) - ); - test_coercion_binary_rule!( - DataType::Utf8, - DataType::Time64(TimeUnit::Nanosecond), - Operator::Eq, - DataType::Time64(TimeUnit::Nanosecond) - ); - test_coercion_binary_rule!( - DataType::Utf8, - DataType::Timestamp(TimeUnit::Second, None), - Operator::Lt, - DataType::Timestamp(TimeUnit::Nanosecond, None) - ); - test_coercion_binary_rule!( - DataType::Utf8, - DataType::Timestamp(TimeUnit::Millisecond, None), - Operator::Lt, - DataType::Timestamp(TimeUnit::Nanosecond, None) - ); - test_coercion_binary_rule!( - DataType::Utf8, - DataType::Timestamp(TimeUnit::Microsecond, None), - Operator::Lt, - DataType::Timestamp(TimeUnit::Nanosecond, None) - ); - test_coercion_binary_rule!( - DataType::Utf8, - DataType::Timestamp(TimeUnit::Nanosecond, None), - Operator::Lt, - DataType::Timestamp(TimeUnit::Nanosecond, None) - ); - test_coercion_binary_rule!( - DataType::Utf8, - DataType::Utf8, - Operator::RegexMatch, - DataType::Utf8 - ); - test_coercion_binary_rule!( - DataType::Utf8, - DataType::Utf8View, - Operator::RegexMatch, - DataType::Utf8View - ); - test_coercion_binary_rule!( - DataType::Utf8View, - DataType::Utf8, - Operator::RegexMatch, - DataType::Utf8View - ); - test_coercion_binary_rule!( - DataType::Utf8View, - DataType::Utf8View, - Operator::RegexMatch, - DataType::Utf8View - ); - test_coercion_binary_rule!( - DataType::Utf8, - DataType::Utf8, - Operator::RegexNotMatch, - DataType::Utf8 - ); - test_coercion_binary_rule!( - DataType::Utf8View, - DataType::Utf8, - Operator::RegexNotMatch, - DataType::Utf8View - ); - test_coercion_binary_rule!( - DataType::Utf8, - DataType::Utf8View, - Operator::RegexNotMatch, - DataType::Utf8View - ); - test_coercion_binary_rule!( - DataType::Utf8View, - DataType::Utf8View, - Operator::RegexNotMatch, - DataType::Utf8View - ); - test_coercion_binary_rule!( - DataType::Utf8, - DataType::Utf8, - Operator::RegexNotIMatch, - DataType::Utf8 - ); - test_coercion_binary_rule!( - DataType::Utf8View, - DataType::Utf8, - Operator::RegexNotIMatch, - DataType::Utf8View - ); - test_coercion_binary_rule!( - DataType::Utf8, - DataType::Utf8View, - Operator::RegexNotIMatch, - DataType::Utf8View - ); - test_coercion_binary_rule!( - DataType::Utf8View, - DataType::Utf8View, - Operator::RegexNotIMatch, - DataType::Utf8View - ); - test_coercion_binary_rule!( - DataType::Dictionary(DataType::Int32.into(), DataType::Utf8.into()), - DataType::Utf8, - Operator::RegexMatch, - DataType::Utf8 - ); - test_coercion_binary_rule!( - DataType::Dictionary(DataType::Int32.into(), DataType::Utf8.into()), - DataType::Utf8View, - Operator::RegexMatch, - DataType::Utf8View - ); - test_coercion_binary_rule!( - DataType::Dictionary(DataType::Int32.into(), DataType::Utf8View.into()), - DataType::Utf8, - Operator::RegexMatch, - DataType::Utf8View - ); - test_coercion_binary_rule!( - DataType::Dictionary(DataType::Int32.into(), DataType::Utf8View.into()), - DataType::Utf8View, - Operator::RegexMatch, - DataType::Utf8View - ); - test_coercion_binary_rule!( - DataType::Dictionary(DataType::Int32.into(), DataType::Utf8.into()), - DataType::Utf8, - Operator::RegexIMatch, - DataType::Utf8 - ); - test_coercion_binary_rule!( - DataType::Dictionary(DataType::Int32.into(), DataType::Utf8View.into()), - DataType::Utf8, - Operator::RegexIMatch, - DataType::Utf8View - ); - test_coercion_binary_rule!( - DataType::Dictionary(DataType::Int32.into(), DataType::Utf8.into()), - DataType::Utf8View, - Operator::RegexIMatch, - DataType::Utf8View - ); - test_coercion_binary_rule!( - DataType::Dictionary(DataType::Int32.into(), DataType::Utf8View.into()), - DataType::Utf8View, - Operator::RegexIMatch, - DataType::Utf8View - ); - test_coercion_binary_rule!( - DataType::Dictionary(DataType::Int32.into(), DataType::Utf8.into()), - DataType::Utf8, - Operator::RegexNotMatch, - DataType::Utf8 - ); - test_coercion_binary_rule!( - DataType::Dictionary(DataType::Int32.into(), DataType::Utf8.into()), - DataType::Utf8View, - Operator::RegexNotMatch, - DataType::Utf8View - ); - test_coercion_binary_rule!( - DataType::Dictionary(DataType::Int32.into(), DataType::Utf8View.into()), - DataType::Utf8, - Operator::RegexNotMatch, - DataType::Utf8View - ); - test_coercion_binary_rule!( - DataType::Dictionary(DataType::Int32.into(), DataType::Utf8.into()), - DataType::Utf8View, - Operator::RegexNotMatch, - DataType::Utf8View - ); - test_coercion_binary_rule!( - DataType::Dictionary(DataType::Int32.into(), DataType::Utf8.into()), - DataType::Utf8, - Operator::RegexNotIMatch, - DataType::Utf8 - ); - test_coercion_binary_rule!( - DataType::Dictionary(DataType::Int32.into(), DataType::Utf8View.into()), - DataType::Utf8, - Operator::RegexNotIMatch, - DataType::Utf8View - ); - test_coercion_binary_rule!( - DataType::Dictionary(DataType::Int32.into(), DataType::Utf8.into()), - DataType::Utf8View, - Operator::RegexNotIMatch, - DataType::Utf8View - ); - test_coercion_binary_rule!( - DataType::Dictionary(DataType::Int32.into(), DataType::Utf8View.into()), - DataType::Utf8View, - Operator::RegexNotIMatch, - DataType::Utf8View - ); - test_coercion_binary_rule!( - DataType::Int16, - DataType::Int64, - Operator::BitwiseAnd, - DataType::Int64 - ); - test_coercion_binary_rule!( - DataType::UInt64, - DataType::UInt64, - Operator::BitwiseAnd, - DataType::UInt64 - ); - test_coercion_binary_rule!( - DataType::Int8, - DataType::UInt32, - Operator::BitwiseAnd, - DataType::Int64 - ); - test_coercion_binary_rule!( - DataType::UInt32, - DataType::Int32, - Operator::BitwiseAnd, - DataType::Int64 - ); - test_coercion_binary_rule!( - DataType::UInt16, - DataType::Int16, - Operator::BitwiseAnd, - DataType::Int32 - ); - test_coercion_binary_rule!( - DataType::UInt32, - DataType::UInt32, - Operator::BitwiseAnd, - DataType::UInt32 - ); - test_coercion_binary_rule!( - DataType::UInt16, - DataType::UInt32, - Operator::BitwiseAnd, - DataType::UInt32 - ); - Ok(()) - } - - #[test] - fn test_type_coercion_arithmetic() -> Result<()> { - use DataType::*; - - // (Float64, _) | (_, Float64) => Some(Float64), - test_coercion_binary_rule_multiple!( - Float64, - [ - Float64, Float32, Float16, Int64, UInt64, Int32, UInt32, Int16, UInt16, - Int8, UInt8 - ], - Operator::Plus, - Float64 - ); - // (_, Float32) | (Float32, _) => Some(Float32), - test_coercion_binary_rule_multiple!( - Float32, - [ - Float32, Float16, Int64, UInt64, Int32, UInt32, Int16, UInt16, Int8, - UInt8 - ], - Operator::Plus, - Float32 - ); - // (_, Float16) | (Float16, _) => Some(Float16), - test_coercion_binary_rule_multiple!( - Float16, - [Float16, Int64, UInt64, Int32, UInt32, Int16, UInt16, Int8, UInt8], - Operator::Plus, - Float16 - ); - // (UInt64, Int64 | Int32 | Int16 | Int8) | (Int64 | Int32 | Int16 | Int8, UInt64) => Some(Decimal128(20, 0)), - test_coercion_binary_rule_multiple!( - UInt64, - [Int64, Int32, Int16, Int8], - Operator::Divide, - Decimal128(20, 0) - ); - // (UInt64, _) | (_, UInt64) => Some(UInt64), - test_coercion_binary_rule_multiple!( - UInt64, - [UInt64, UInt32, UInt16, UInt8], - Operator::Modulo, - UInt64 - ); - // (Int64, _) | (_, Int64) => Some(Int64), - test_coercion_binary_rule_multiple!( - Int64, - [Int64, Int32, UInt32, Int16, UInt16, Int8, UInt8], - Operator::Modulo, - Int64 - ); - // (UInt32, Int32 | Int16 | Int8) | (Int32 | Int16 | Int8, UInt32) => Some(Int64) - test_coercion_binary_rule_multiple!( - UInt32, - [Int32, Int16, Int8], - Operator::Modulo, - Int64 - ); - // (UInt32, _) | (_, UInt32) => Some(UInt32), - test_coercion_binary_rule_multiple!( - UInt32, - [UInt32, UInt16, UInt8], - Operator::Modulo, - UInt32 - ); - // (Int32, _) | (_, Int32) => Some(Int32), - test_coercion_binary_rule_multiple!( - Int32, - [Int32, Int16, Int8], - Operator::Modulo, - Int32 - ); - // (UInt16, Int16 | Int8) | (Int16 | Int8, UInt16) => Some(Int32) - test_coercion_binary_rule_multiple!( - UInt16, - [Int16, Int8], - Operator::Minus, - Int32 - ); - // (UInt16, _) | (_, UInt16) => Some(UInt16), - test_coercion_binary_rule_multiple!( - UInt16, - [UInt16, UInt8, UInt8], - Operator::Plus, - UInt16 - ); - // (Int16, _) | (_, Int16) => Some(Int16), - test_coercion_binary_rule_multiple!(Int16, [Int16, Int8], Operator::Plus, Int16); - // (UInt8, Int8) | (Int8, UInt8) => Some(Int16) - test_coercion_binary_rule!(Int8, UInt8, Operator::Minus, Int16); - test_coercion_binary_rule!(UInt8, Int8, Operator::Multiply, Int16); - // (UInt8, _) | (_, UInt8) => Some(UInt8), - test_coercion_binary_rule!(UInt8, UInt8, Operator::Minus, UInt8); - // (Int8, _) | (_, Int8) => Some(Int8), - test_coercion_binary_rule!(Int8, Int8, Operator::Plus, Int8); - - Ok(()) - } - - fn test_math_decimal_coercion_rule( - lhs_type: DataType, - rhs_type: DataType, - expected_lhs_type: DataType, - expected_rhs_type: DataType, - ) { - // The coerced types for lhs and rhs, if any of them is not decimal - let (lhs_type, rhs_type) = math_decimal_coercion(&lhs_type, &rhs_type).unwrap(); - assert_eq!(lhs_type, expected_lhs_type); - assert_eq!(rhs_type, expected_rhs_type); - } - - #[test] - fn test_coercion_arithmetic_decimal() -> Result<()> { - test_math_decimal_coercion_rule( - DataType::Decimal128(10, 2), - DataType::Decimal128(10, 2), - DataType::Decimal128(10, 2), - DataType::Decimal128(10, 2), - ); - - test_math_decimal_coercion_rule( - DataType::Int32, - DataType::Decimal128(10, 2), - DataType::Decimal128(10, 0), - DataType::Decimal128(10, 2), - ); - - test_math_decimal_coercion_rule( - DataType::Int32, - DataType::Decimal128(10, 2), - DataType::Decimal128(10, 0), - DataType::Decimal128(10, 2), - ); - - test_math_decimal_coercion_rule( - DataType::Int32, - DataType::Decimal128(10, 2), - DataType::Decimal128(10, 0), - DataType::Decimal128(10, 2), - ); - - test_math_decimal_coercion_rule( - DataType::Int32, - DataType::Decimal128(10, 2), - DataType::Decimal128(10, 0), - DataType::Decimal128(10, 2), - ); - - test_math_decimal_coercion_rule( - DataType::Int32, - DataType::Decimal128(10, 2), - DataType::Decimal128(10, 0), - DataType::Decimal128(10, 2), - ); - - Ok(()) - } - - #[test] - fn test_type_coercion_compare() -> Result<()> { - // boolean - test_coercion_binary_rule!( - DataType::Boolean, - DataType::Boolean, - Operator::Eq, - DataType::Boolean - ); - // float - test_coercion_binary_rule!( - DataType::Float16, - DataType::Int64, - Operator::Eq, - DataType::Float16 - ); - test_coercion_binary_rule!( - DataType::Float16, - DataType::Float64, - Operator::Eq, - DataType::Float64 - ); - test_coercion_binary_rule!( - DataType::Float32, - DataType::Int64, - Operator::Eq, - DataType::Float32 - ); - test_coercion_binary_rule!( - DataType::Float32, - DataType::Float64, - Operator::GtEq, - DataType::Float64 - ); - // signed integer - test_coercion_binary_rule!( - DataType::Int8, - DataType::Int32, - Operator::LtEq, - DataType::Int32 - ); - test_coercion_binary_rule!( - DataType::Int64, - DataType::Int32, - Operator::LtEq, - DataType::Int64 - ); - // unsigned integer - test_coercion_binary_rule!( - DataType::UInt32, - DataType::UInt8, - Operator::Gt, - DataType::UInt32 - ); - test_coercion_binary_rule!( - DataType::UInt64, - DataType::UInt8, - Operator::Eq, - DataType::UInt64 - ); - test_coercion_binary_rule!( - DataType::UInt64, - DataType::Int64, - Operator::Eq, - DataType::Decimal128(20, 0) - ); - // numeric/decimal - test_coercion_binary_rule!( - DataType::Int64, - DataType::Decimal128(10, 0), - Operator::Eq, - DataType::Decimal128(20, 0) - ); - test_coercion_binary_rule!( - DataType::Int64, - DataType::Decimal128(10, 2), - Operator::Lt, - DataType::Decimal128(22, 2) - ); - test_coercion_binary_rule!( - DataType::Float64, - DataType::Decimal128(10, 3), - Operator::Gt, - DataType::Decimal128(30, 15) - ); - test_coercion_binary_rule!( - DataType::Int64, - DataType::Decimal128(10, 0), - Operator::Eq, - DataType::Decimal128(20, 0) - ); - test_coercion_binary_rule!( - DataType::Decimal128(14, 2), - DataType::Decimal128(10, 3), - Operator::GtEq, - DataType::Decimal128(15, 3) - ); - test_coercion_binary_rule!( - DataType::UInt64, - DataType::Decimal128(20, 0), - Operator::Eq, - DataType::Decimal128(20, 0) - ); - - // Binary - test_coercion_binary_rule!( - DataType::Binary, - DataType::Binary, - Operator::Eq, - DataType::Binary - ); - test_coercion_binary_rule!( - DataType::Utf8, - DataType::Binary, - Operator::Eq, - DataType::Binary - ); - test_coercion_binary_rule!( - DataType::Binary, - DataType::Utf8, - Operator::Eq, - DataType::Binary - ); - - // LargeBinary - test_coercion_binary_rule!( - DataType::LargeBinary, - DataType::LargeBinary, - Operator::Eq, - DataType::LargeBinary - ); - test_coercion_binary_rule!( - DataType::Binary, - DataType::LargeBinary, - Operator::Eq, - DataType::LargeBinary - ); - test_coercion_binary_rule!( - DataType::LargeBinary, - DataType::Binary, - Operator::Eq, - DataType::LargeBinary - ); - test_coercion_binary_rule!( - DataType::Utf8, - DataType::LargeBinary, - Operator::Eq, - DataType::LargeBinary - ); - test_coercion_binary_rule!( - DataType::LargeBinary, - DataType::Utf8, - Operator::Eq, - DataType::LargeBinary - ); - test_coercion_binary_rule!( - DataType::LargeUtf8, - DataType::LargeBinary, - Operator::Eq, - DataType::LargeBinary - ); - test_coercion_binary_rule!( - DataType::LargeBinary, - DataType::LargeUtf8, - Operator::Eq, - DataType::LargeBinary - ); - - // Timestamps - let utc: Option> = Some("UTC".into()); - test_coercion_binary_rule!( - DataType::Timestamp(TimeUnit::Second, utc.clone()), - DataType::Timestamp(TimeUnit::Second, utc.clone()), - Operator::Eq, - DataType::Timestamp(TimeUnit::Second, utc.clone()) - ); - test_coercion_binary_rule!( - DataType::Timestamp(TimeUnit::Second, utc.clone()), - DataType::Timestamp(TimeUnit::Second, Some("Europe/Brussels".into())), - Operator::Eq, - DataType::Timestamp(TimeUnit::Second, utc.clone()) - ); - test_coercion_binary_rule!( - DataType::Timestamp(TimeUnit::Second, Some("America/New_York".into())), - DataType::Timestamp(TimeUnit::Second, Some("Europe/Brussels".into())), - Operator::Eq, - DataType::Timestamp(TimeUnit::Second, Some("America/New_York".into())) - ); - test_coercion_binary_rule!( - DataType::Timestamp(TimeUnit::Second, Some("Europe/Brussels".into())), - DataType::Timestamp(TimeUnit::Second, utc), - Operator::Eq, - DataType::Timestamp(TimeUnit::Second, Some("Europe/Brussels".into())) - ); - - // list - let inner_field = Arc::new(Field::new_list_field(DataType::Int64, true)); - test_coercion_binary_rule!( - DataType::List(Arc::clone(&inner_field)), - DataType::List(Arc::clone(&inner_field)), - Operator::Eq, - DataType::List(Arc::clone(&inner_field)) - ); - test_coercion_binary_rule!( - DataType::List(Arc::clone(&inner_field)), - DataType::LargeList(Arc::clone(&inner_field)), - Operator::Eq, - DataType::LargeList(Arc::clone(&inner_field)) - ); - test_coercion_binary_rule!( - DataType::LargeList(Arc::clone(&inner_field)), - DataType::List(Arc::clone(&inner_field)), - Operator::Eq, - DataType::LargeList(Arc::clone(&inner_field)) - ); - test_coercion_binary_rule!( - DataType::LargeList(Arc::clone(&inner_field)), - DataType::LargeList(Arc::clone(&inner_field)), - Operator::Eq, - DataType::LargeList(Arc::clone(&inner_field)) - ); - test_coercion_binary_rule!( - DataType::FixedSizeList(Arc::clone(&inner_field), 10), - DataType::FixedSizeList(Arc::clone(&inner_field), 10), - Operator::Eq, - DataType::FixedSizeList(Arc::clone(&inner_field), 10) - ); - test_coercion_binary_rule!( - DataType::FixedSizeList(Arc::clone(&inner_field), 10), - DataType::LargeList(Arc::clone(&inner_field)), - Operator::Eq, - DataType::LargeList(Arc::clone(&inner_field)) - ); - test_coercion_binary_rule!( - DataType::LargeList(Arc::clone(&inner_field)), - DataType::FixedSizeList(Arc::clone(&inner_field), 10), - Operator::Eq, - DataType::LargeList(Arc::clone(&inner_field)) - ); - test_coercion_binary_rule!( - DataType::List(Arc::clone(&inner_field)), - DataType::FixedSizeList(Arc::clone(&inner_field), 10), - Operator::Eq, - DataType::List(Arc::clone(&inner_field)) - ); - test_coercion_binary_rule!( - DataType::FixedSizeList(Arc::clone(&inner_field), 10), - DataType::List(Arc::clone(&inner_field)), - Operator::Eq, - DataType::List(Arc::clone(&inner_field)) - ); - - // Negative test: inner_timestamp_field and inner_field are not compatible because their inner types are not compatible - let inner_timestamp_field = Arc::new(Field::new_list_field( - DataType::Timestamp(TimeUnit::Microsecond, None), - true, - )); - let result_type = BinaryTypeCoercer::new( - &DataType::List(Arc::clone(&inner_field)), - &Operator::Eq, - &DataType::List(Arc::clone(&inner_timestamp_field)), - ) - .get_input_types(); - assert!(result_type.is_err()); - - // TODO add other data type - Ok(()) - } - - #[test] - fn test_list_coercion() { - let lhs_type = DataType::List(Arc::new(Field::new("lhs", DataType::Int8, false))); - - let rhs_type = DataType::List(Arc::new(Field::new("rhs", DataType::Int64, true))); - - let coerced_type = list_coercion(&lhs_type, &rhs_type).unwrap(); - assert_eq!( - coerced_type, - DataType::List(Arc::new(Field::new("lhs", DataType::Int64, true))) - ); // nullable because the RHS is nullable - } - - #[test] - fn test_type_coercion_logical_op() -> Result<()> { - test_coercion_binary_rule!( - DataType::Boolean, - DataType::Boolean, - Operator::And, - DataType::Boolean - ); - - test_coercion_binary_rule!( - DataType::Boolean, - DataType::Boolean, - Operator::Or, - DataType::Boolean - ); - test_coercion_binary_rule!( - DataType::Boolean, - DataType::Null, - Operator::And, - DataType::Boolean - ); - test_coercion_binary_rule!( - DataType::Boolean, - DataType::Null, - Operator::Or, - DataType::Boolean - ); - test_coercion_binary_rule!( - DataType::Null, - DataType::Null, - Operator::Or, - DataType::Boolean - ); - test_coercion_binary_rule!( - DataType::Null, - DataType::Null, - Operator::And, - DataType::Boolean - ); - test_coercion_binary_rule!( - DataType::Null, - DataType::Boolean, - Operator::And, - DataType::Boolean - ); - test_coercion_binary_rule!( - DataType::Null, - DataType::Boolean, - Operator::Or, - DataType::Boolean - ); - Ok(()) - } - - #[test] - fn test_map_coercion() -> Result<()> { - let lhs = Field::new_map( - "lhs", - "entries", - Arc::new(Field::new("keys", DataType::Utf8, false)), - Arc::new(Field::new("values", DataType::LargeUtf8, false)), - true, - false, - ); - let rhs = Field::new_map( - "rhs", - "kvp", - Arc::new(Field::new("k", DataType::Utf8, false)), - Arc::new(Field::new("v", DataType::Utf8, true)), - false, - true, - ); - - let expected = Field::new_map( - "expected", - "entries", // struct coercion takes lhs name - Arc::new(Field::new( - "keys", // struct coercion takes lhs name - DataType::Utf8, - false, - )), - Arc::new(Field::new( - "values", // struct coercion takes lhs name - DataType::LargeUtf8, // lhs is large string - true, // rhs is nullable - )), - false, // both sides must be sorted - true, // rhs is nullable - ); - - test_coercion_binary_rule!( - lhs.data_type(), - rhs.data_type(), - Operator::Eq, - expected.data_type().clone() - ); - Ok(()) - } -} +mod tests; diff --git a/datafusion/expr-common/src/type_coercion/binary/tests/arithmetic.rs b/datafusion/expr-common/src/type_coercion/binary/tests/arithmetic.rs new file mode 100644 index 0000000000000..fdd41ae2bb47f --- /dev/null +++ b/datafusion/expr-common/src/type_coercion/binary/tests/arithmetic.rs @@ -0,0 +1,250 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use super::*; +use datafusion_common::assert_contains; + +#[test] +fn test_coercion_error() -> Result<()> { + let coercer = + BinaryTypeCoercer::new(&DataType::Float32, &Operator::Plus, &DataType::Utf8); + let result_type = coercer.get_input_types(); + + let e = result_type.unwrap_err(); + assert_eq!(e.strip_backtrace(), "Error during planning: Cannot coerce arithmetic expression Float32 + Utf8 to valid types"); + Ok(()) +} + +#[test] +fn test_date_timestamp_arithmetic_error() -> Result<()> { + let (lhs, rhs) = BinaryTypeCoercer::new( + &DataType::Timestamp(TimeUnit::Nanosecond, None), + &Operator::Minus, + &DataType::Timestamp(TimeUnit::Millisecond, None), + ) + .get_input_types()?; + assert_eq!(lhs.to_string(), "Timestamp(Millisecond, None)"); + assert_eq!(rhs.to_string(), "Timestamp(Millisecond, None)"); + + let err = + BinaryTypeCoercer::new(&DataType::Date32, &Operator::Plus, &DataType::Date64) + .get_input_types() + .unwrap_err() + .to_string(); + + assert_contains!( + &err, + "Cannot get result type for temporal operation Date64 + Date64" + ); + + Ok(()) +} + +#[test] +fn test_decimal_mathematics_op_type() { + assert_eq!( + coerce_numeric_type_to_decimal(&DataType::Int8).unwrap(), + DataType::Decimal128(3, 0) + ); + assert_eq!( + coerce_numeric_type_to_decimal(&DataType::Int16).unwrap(), + DataType::Decimal128(5, 0) + ); + assert_eq!( + coerce_numeric_type_to_decimal(&DataType::Int32).unwrap(), + DataType::Decimal128(10, 0) + ); + assert_eq!( + coerce_numeric_type_to_decimal(&DataType::Int64).unwrap(), + DataType::Decimal128(20, 0) + ); + assert_eq!( + coerce_numeric_type_to_decimal(&DataType::Float16).unwrap(), + DataType::Decimal128(6, 3) + ); + assert_eq!( + coerce_numeric_type_to_decimal(&DataType::Float32).unwrap(), + DataType::Decimal128(14, 7) + ); + assert_eq!( + coerce_numeric_type_to_decimal(&DataType::Float64).unwrap(), + DataType::Decimal128(30, 15) + ); +} + +#[test] +fn test_type_coercion_arithmetic() -> Result<()> { + use DataType::*; + + // (Float64, _) | (_, Float64) => Some(Float64) + test_coercion_binary_rule_multiple!( + Float64, + [ + Float64, Float32, Float16, Int64, UInt64, Int32, UInt32, Int16, UInt16, Int8, + UInt8 + ], + Operator::Plus, + Float64 + ); + // (_, Float32) | (Float32, _) => Some(Float32) + test_coercion_binary_rule_multiple!( + Float32, + [Float32, Float16, Int64, UInt64, Int32, UInt32, Int16, UInt16, Int8, UInt8], + Operator::Plus, + Float32 + ); + // (_, Float16) | (Float16, _) => Some(Float16) + test_coercion_binary_rule_multiple!( + Float16, + [Float16, Int64, UInt64, Int32, UInt32, Int16, UInt16, Int8, UInt8], + Operator::Plus, + Float16 + ); + // (UInt64, Int64 | Int32 | Int16 | Int8) | (Int64 | Int32 | Int16 | Int8, UInt64) => Some(Decimal128(20, 0)) + test_coercion_binary_rule_multiple!( + UInt64, + [Int64, Int32, Int16, Int8], + Operator::Divide, + Decimal128(20, 0) + ); + // (UInt64, _) | (_, UInt64) => Some(UInt64) + test_coercion_binary_rule_multiple!( + UInt64, + [UInt64, UInt32, UInt16, UInt8], + Operator::Modulo, + UInt64 + ); + // (Int64, _) | (_, Int64) => Some(Int64) + test_coercion_binary_rule_multiple!( + Int64, + [Int64, Int32, UInt32, Int16, UInt16, Int8, UInt8], + Operator::Modulo, + Int64 + ); + // (UInt32, Int32 | Int16 | Int8) | (Int32 | Int16 | Int8, UInt32) => Some(Int64) + test_coercion_binary_rule_multiple!( + UInt32, + [Int32, Int16, Int8], + Operator::Modulo, + Int64 + ); + // (UInt32, _) | (_, UInt32) => Some(UInt32) + test_coercion_binary_rule_multiple!( + UInt32, + [UInt32, UInt16, UInt8], + Operator::Modulo, + UInt32 + ); + // (Int32, _) | (_, Int32) => Some(Int32) + test_coercion_binary_rule_multiple!( + Int32, + [Int32, Int16, Int8], + Operator::Modulo, + Int32 + ); + // (UInt16, Int16 | Int8) | (Int16 | Int8, UInt16) => Some(Int32) + test_coercion_binary_rule_multiple!(UInt16, [Int16, Int8], Operator::Minus, Int32); + // (UInt16, _) | (_, UInt16) => Some(UInt16) + test_coercion_binary_rule_multiple!( + UInt16, + [UInt16, UInt8, UInt8], + Operator::Plus, + UInt16 + ); + // (Int16, _) | (_, Int16) => Some(Int16) + test_coercion_binary_rule_multiple!(Int16, [Int16, Int8], Operator::Plus, Int16); + // (UInt8, Int8) | (Int8, UInt8) => Some(Int16) + test_coercion_binary_rule!(Int8, UInt8, Operator::Minus, Int16); + test_coercion_binary_rule!(UInt8, Int8, Operator::Multiply, Int16); + // (UInt8, _) | (_, UInt8) => Some(UInt8) + test_coercion_binary_rule!(UInt8, UInt8, Operator::Minus, UInt8); + // (Int8, _) | (_, Int8) => Some(Int8) + test_coercion_binary_rule!(Int8, Int8, Operator::Plus, Int8); + + Ok(()) +} + +fn test_math_decimal_coercion_rule( + lhs_type: DataType, + rhs_type: DataType, + expected_lhs_type: DataType, + expected_rhs_type: DataType, +) { + let (lhs_type, rhs_type) = math_decimal_coercion(&lhs_type, &rhs_type).unwrap(); + assert_eq!(lhs_type, expected_lhs_type); + assert_eq!(rhs_type, expected_rhs_type); +} + +#[test] +fn test_coercion_arithmetic_decimal() -> Result<()> { + test_math_decimal_coercion_rule( + DataType::Decimal128(10, 2), + DataType::Decimal128(10, 2), + DataType::Decimal128(10, 2), + DataType::Decimal128(10, 2), + ); + + test_math_decimal_coercion_rule( + DataType::Int32, + DataType::Decimal128(10, 2), + DataType::Decimal128(10, 0), + DataType::Decimal128(10, 2), + ); + + test_math_decimal_coercion_rule( + DataType::Int32, + DataType::Decimal128(10, 2), + DataType::Decimal128(10, 0), + DataType::Decimal128(10, 2), + ); + + test_math_decimal_coercion_rule( + DataType::Int32, + DataType::Decimal128(10, 2), + DataType::Decimal128(10, 0), + DataType::Decimal128(10, 2), + ); + + test_math_decimal_coercion_rule( + DataType::Int32, + DataType::Decimal128(10, 2), + DataType::Decimal128(10, 0), + DataType::Decimal128(10, 2), + ); + + test_math_decimal_coercion_rule( + DataType::Int32, + DataType::Decimal128(10, 2), + DataType::Decimal128(10, 0), + DataType::Decimal128(10, 2), + ); + + test_math_decimal_coercion_rule( + DataType::UInt32, + DataType::Decimal128(10, 2), + DataType::Decimal128(10, 0), + DataType::Decimal128(10, 2), + ); + test_math_decimal_coercion_rule( + DataType::Decimal128(10, 2), + DataType::UInt32, + DataType::Decimal128(10, 2), + DataType::Decimal128(10, 0), + ); + + Ok(()) +} diff --git a/datafusion/expr-common/src/type_coercion/binary/tests/comparison.rs b/datafusion/expr-common/src/type_coercion/binary/tests/comparison.rs new file mode 100644 index 0000000000000..208edae4ffc28 --- /dev/null +++ b/datafusion/expr-common/src/type_coercion/binary/tests/comparison.rs @@ -0,0 +1,699 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use super::*; + +#[test] +fn test_decimal_binary_comparison_coercion() -> Result<()> { + let input_decimal = DataType::Decimal128(20, 3); + let input_types = [ + DataType::Int8, + DataType::Int16, + DataType::Int32, + DataType::Int64, + DataType::Float32, + DataType::Float64, + DataType::Decimal128(38, 10), + DataType::Decimal128(20, 8), + DataType::Null, + ]; + let result_types = [ + DataType::Decimal128(20, 3), + DataType::Decimal128(20, 3), + DataType::Decimal128(20, 3), + DataType::Decimal128(23, 3), + DataType::Decimal128(24, 7), + DataType::Decimal128(32, 15), + DataType::Decimal128(38, 10), + DataType::Decimal128(25, 8), + DataType::Decimal128(20, 3), + ]; + let comparison_op_types = [ + Operator::NotEq, + Operator::Eq, + Operator::Gt, + Operator::GtEq, + Operator::Lt, + Operator::LtEq, + ]; + for (i, input_type) in input_types.iter().enumerate() { + let expect_type = &result_types[i]; + for op in comparison_op_types { + let (lhs, rhs) = BinaryTypeCoercer::new(&input_decimal, &op, input_type) + .get_input_types()?; + assert_eq!(expect_type, &lhs); + assert_eq!(expect_type, &rhs); + } + } + // negative test + let result_type = + BinaryTypeCoercer::new(&input_decimal, &Operator::Eq, &DataType::Boolean) + .get_input_types(); + assert!(result_type.is_err()); + Ok(()) +} + +#[test] +fn test_like_coercion() { + // string coerce to strings + test_like_rule!(DataType::Utf8, DataType::Utf8, Some(DataType::Utf8)); + test_like_rule!( + DataType::LargeUtf8, + DataType::Utf8, + Some(DataType::LargeUtf8) + ); + test_like_rule!( + DataType::Utf8, + DataType::LargeUtf8, + Some(DataType::LargeUtf8) + ); + test_like_rule!( + DataType::LargeUtf8, + DataType::LargeUtf8, + Some(DataType::LargeUtf8) + ); + + // Also coerce binary to strings + test_like_rule!(DataType::Binary, DataType::Utf8, Some(DataType::Utf8)); + test_like_rule!( + DataType::LargeBinary, + DataType::Utf8, + Some(DataType::LargeUtf8) + ); + test_like_rule!( + DataType::Binary, + DataType::LargeUtf8, + Some(DataType::LargeUtf8) + ); + test_like_rule!( + DataType::LargeBinary, + DataType::LargeUtf8, + Some(DataType::LargeUtf8) + ); +} + +#[test] +fn test_type_coercion() -> Result<()> { + test_coercion_binary_rule!( + DataType::Utf8, + DataType::Date32, + Operator::Eq, + DataType::Date32 + ); + test_coercion_binary_rule!( + DataType::Utf8, + DataType::Date64, + Operator::Lt, + DataType::Date64 + ); + test_coercion_binary_rule!( + DataType::Utf8, + DataType::Time32(TimeUnit::Second), + Operator::Eq, + DataType::Time32(TimeUnit::Second) + ); + test_coercion_binary_rule!( + DataType::Utf8, + DataType::Time32(TimeUnit::Millisecond), + Operator::Eq, + DataType::Time32(TimeUnit::Millisecond) + ); + test_coercion_binary_rule!( + DataType::Utf8, + DataType::Time64(TimeUnit::Microsecond), + Operator::Eq, + DataType::Time64(TimeUnit::Microsecond) + ); + test_coercion_binary_rule!( + DataType::Utf8, + DataType::Time64(TimeUnit::Nanosecond), + Operator::Eq, + DataType::Time64(TimeUnit::Nanosecond) + ); + test_coercion_binary_rule!( + DataType::Utf8, + DataType::Timestamp(TimeUnit::Second, None), + Operator::Lt, + DataType::Timestamp(TimeUnit::Nanosecond, None) + ); + test_coercion_binary_rule!( + DataType::Utf8, + DataType::Timestamp(TimeUnit::Millisecond, None), + Operator::Lt, + DataType::Timestamp(TimeUnit::Nanosecond, None) + ); + test_coercion_binary_rule!( + DataType::Utf8, + DataType::Timestamp(TimeUnit::Microsecond, None), + Operator::Lt, + DataType::Timestamp(TimeUnit::Nanosecond, None) + ); + test_coercion_binary_rule!( + DataType::Utf8, + DataType::Timestamp(TimeUnit::Nanosecond, None), + Operator::Lt, + DataType::Timestamp(TimeUnit::Nanosecond, None) + ); + test_coercion_binary_rule!( + DataType::Utf8, + DataType::Utf8, + Operator::RegexMatch, + DataType::Utf8 + ); + test_coercion_binary_rule!( + DataType::Utf8, + DataType::Utf8View, + Operator::RegexMatch, + DataType::Utf8View + ); + test_coercion_binary_rule!( + DataType::Utf8View, + DataType::Utf8, + Operator::RegexMatch, + DataType::Utf8View + ); + test_coercion_binary_rule!( + DataType::Utf8View, + DataType::Utf8View, + Operator::RegexMatch, + DataType::Utf8View + ); + test_coercion_binary_rule!( + DataType::Utf8, + DataType::Utf8, + Operator::RegexNotMatch, + DataType::Utf8 + ); + test_coercion_binary_rule!( + DataType::Utf8View, + DataType::Utf8, + Operator::RegexNotMatch, + DataType::Utf8View + ); + test_coercion_binary_rule!( + DataType::Utf8, + DataType::Utf8View, + Operator::RegexNotMatch, + DataType::Utf8View + ); + test_coercion_binary_rule!( + DataType::Utf8View, + DataType::Utf8View, + Operator::RegexNotMatch, + DataType::Utf8View + ); + test_coercion_binary_rule!( + DataType::Utf8, + DataType::Utf8, + Operator::RegexNotIMatch, + DataType::Utf8 + ); + test_coercion_binary_rule!( + DataType::Utf8View, + DataType::Utf8, + Operator::RegexNotIMatch, + DataType::Utf8View + ); + test_coercion_binary_rule!( + DataType::Utf8, + DataType::Utf8View, + Operator::RegexNotIMatch, + DataType::Utf8View + ); + test_coercion_binary_rule!( + DataType::Utf8View, + DataType::Utf8View, + Operator::RegexNotIMatch, + DataType::Utf8View + ); + test_coercion_binary_rule!( + DataType::Dictionary(DataType::Int32.into(), DataType::Utf8.into()), + DataType::Utf8, + Operator::RegexMatch, + DataType::Utf8 + ); + test_coercion_binary_rule!( + DataType::Dictionary(DataType::Int32.into(), DataType::Utf8.into()), + DataType::Utf8View, + Operator::RegexMatch, + DataType::Utf8View + ); + test_coercion_binary_rule!( + DataType::Dictionary(DataType::Int32.into(), DataType::Utf8View.into()), + DataType::Utf8, + Operator::RegexMatch, + DataType::Utf8View + ); + test_coercion_binary_rule!( + DataType::Dictionary(DataType::Int32.into(), DataType::Utf8View.into()), + DataType::Utf8View, + Operator::RegexMatch, + DataType::Utf8View + ); + test_coercion_binary_rule!( + DataType::Dictionary(DataType::Int32.into(), DataType::Utf8.into()), + DataType::Utf8, + Operator::RegexIMatch, + DataType::Utf8 + ); + test_coercion_binary_rule!( + DataType::Dictionary(DataType::Int32.into(), DataType::Utf8View.into()), + DataType::Utf8, + Operator::RegexIMatch, + DataType::Utf8View + ); + test_coercion_binary_rule!( + DataType::Dictionary(DataType::Int32.into(), DataType::Utf8.into()), + DataType::Utf8View, + Operator::RegexIMatch, + DataType::Utf8View + ); + test_coercion_binary_rule!( + DataType::Dictionary(DataType::Int32.into(), DataType::Utf8View.into()), + DataType::Utf8View, + Operator::RegexIMatch, + DataType::Utf8View + ); + test_coercion_binary_rule!( + DataType::Dictionary(DataType::Int32.into(), DataType::Utf8.into()), + DataType::Utf8, + Operator::RegexNotMatch, + DataType::Utf8 + ); + test_coercion_binary_rule!( + DataType::Dictionary(DataType::Int32.into(), DataType::Utf8.into()), + DataType::Utf8View, + Operator::RegexNotMatch, + DataType::Utf8View + ); + test_coercion_binary_rule!( + DataType::Dictionary(DataType::Int32.into(), DataType::Utf8View.into()), + DataType::Utf8, + Operator::RegexNotMatch, + DataType::Utf8View + ); + test_coercion_binary_rule!( + DataType::Dictionary(DataType::Int32.into(), DataType::Utf8.into()), + DataType::Utf8View, + Operator::RegexNotMatch, + DataType::Utf8View + ); + test_coercion_binary_rule!( + DataType::Dictionary(DataType::Int32.into(), DataType::Utf8.into()), + DataType::Utf8, + Operator::RegexNotIMatch, + DataType::Utf8 + ); + test_coercion_binary_rule!( + DataType::Dictionary(DataType::Int32.into(), DataType::Utf8View.into()), + DataType::Utf8, + Operator::RegexNotIMatch, + DataType::Utf8View + ); + test_coercion_binary_rule!( + DataType::Dictionary(DataType::Int32.into(), DataType::Utf8.into()), + DataType::Utf8View, + Operator::RegexNotIMatch, + DataType::Utf8View + ); + test_coercion_binary_rule!( + DataType::Dictionary(DataType::Int32.into(), DataType::Utf8View.into()), + DataType::Utf8View, + Operator::RegexNotIMatch, + DataType::Utf8View + ); + test_coercion_binary_rule!( + DataType::Int16, + DataType::Int64, + Operator::BitwiseAnd, + DataType::Int64 + ); + test_coercion_binary_rule!( + DataType::UInt64, + DataType::UInt64, + Operator::BitwiseAnd, + DataType::UInt64 + ); + test_coercion_binary_rule!( + DataType::Int8, + DataType::UInt32, + Operator::BitwiseAnd, + DataType::Int64 + ); + test_coercion_binary_rule!( + DataType::UInt32, + DataType::Int32, + Operator::BitwiseAnd, + DataType::Int64 + ); + test_coercion_binary_rule!( + DataType::UInt16, + DataType::Int16, + Operator::BitwiseAnd, + DataType::Int32 + ); + test_coercion_binary_rule!( + DataType::UInt32, + DataType::UInt32, + Operator::BitwiseAnd, + DataType::UInt32 + ); + test_coercion_binary_rule!( + DataType::UInt16, + DataType::UInt32, + Operator::BitwiseAnd, + DataType::UInt32 + ); + Ok(()) +} + +#[test] +fn test_type_coercion_compare() -> Result<()> { + // boolean + test_coercion_binary_rule!( + DataType::Boolean, + DataType::Boolean, + Operator::Eq, + DataType::Boolean + ); + // float + test_coercion_binary_rule!( + DataType::Float16, + DataType::Int64, + Operator::Eq, + DataType::Float16 + ); + test_coercion_binary_rule!( + DataType::Float16, + DataType::Float64, + Operator::Eq, + DataType::Float64 + ); + test_coercion_binary_rule!( + DataType::Float32, + DataType::Int64, + Operator::Eq, + DataType::Float32 + ); + test_coercion_binary_rule!( + DataType::Float32, + DataType::Float64, + Operator::GtEq, + DataType::Float64 + ); + // signed integer + test_coercion_binary_rule!( + DataType::Int8, + DataType::Int32, + Operator::LtEq, + DataType::Int32 + ); + test_coercion_binary_rule!( + DataType::Int64, + DataType::Int32, + Operator::LtEq, + DataType::Int64 + ); + // unsigned integer + test_coercion_binary_rule!( + DataType::UInt32, + DataType::UInt8, + Operator::Gt, + DataType::UInt32 + ); + test_coercion_binary_rule!( + DataType::UInt64, + DataType::UInt8, + Operator::Eq, + DataType::UInt64 + ); + test_coercion_binary_rule!( + DataType::UInt64, + DataType::Int64, + Operator::Eq, + DataType::Decimal128(20, 0) + ); + // numeric/decimal + test_coercion_binary_rule!( + DataType::Int64, + DataType::Decimal128(10, 0), + Operator::Eq, + DataType::Decimal128(20, 0) + ); + test_coercion_binary_rule!( + DataType::Int64, + DataType::Decimal128(10, 2), + Operator::Lt, + DataType::Decimal128(22, 2) + ); + test_coercion_binary_rule!( + DataType::Float64, + DataType::Decimal128(10, 3), + Operator::Gt, + DataType::Decimal128(30, 15) + ); + test_coercion_binary_rule!( + DataType::Int64, + DataType::Decimal128(10, 0), + Operator::Eq, + DataType::Decimal128(20, 0) + ); + test_coercion_binary_rule!( + DataType::Decimal128(14, 2), + DataType::Decimal128(10, 3), + Operator::GtEq, + DataType::Decimal128(15, 3) + ); + test_coercion_binary_rule!( + DataType::UInt64, + DataType::Decimal128(20, 0), + Operator::Eq, + DataType::Decimal128(20, 0) + ); + + // Binary + test_coercion_binary_rule!( + DataType::Binary, + DataType::Binary, + Operator::Eq, + DataType::Binary + ); + test_coercion_binary_rule!( + DataType::Utf8, + DataType::Binary, + Operator::Eq, + DataType::Binary + ); + test_coercion_binary_rule!( + DataType::Binary, + DataType::Utf8, + Operator::Eq, + DataType::Binary + ); + + // LargeBinary + test_coercion_binary_rule!( + DataType::LargeBinary, + DataType::LargeBinary, + Operator::Eq, + DataType::LargeBinary + ); + test_coercion_binary_rule!( + DataType::Binary, + DataType::LargeBinary, + Operator::Eq, + DataType::LargeBinary + ); + test_coercion_binary_rule!( + DataType::LargeBinary, + DataType::Binary, + Operator::Eq, + DataType::LargeBinary + ); + test_coercion_binary_rule!( + DataType::Utf8, + DataType::LargeBinary, + Operator::Eq, + DataType::LargeBinary + ); + test_coercion_binary_rule!( + DataType::LargeBinary, + DataType::Utf8, + Operator::Eq, + DataType::LargeBinary + ); + test_coercion_binary_rule!( + DataType::LargeUtf8, + DataType::LargeBinary, + Operator::Eq, + DataType::LargeBinary + ); + test_coercion_binary_rule!( + DataType::LargeBinary, + DataType::LargeUtf8, + Operator::Eq, + DataType::LargeBinary + ); + + // Timestamps + let utc: Option> = Some("UTC".into()); + test_coercion_binary_rule!( + DataType::Timestamp(TimeUnit::Second, utc.clone()), + DataType::Timestamp(TimeUnit::Second, utc.clone()), + Operator::Eq, + DataType::Timestamp(TimeUnit::Second, utc.clone()) + ); + test_coercion_binary_rule!( + DataType::Timestamp(TimeUnit::Second, utc.clone()), + DataType::Timestamp(TimeUnit::Second, Some("Europe/Brussels".into())), + Operator::Eq, + DataType::Timestamp(TimeUnit::Second, utc.clone()) + ); + test_coercion_binary_rule!( + DataType::Timestamp(TimeUnit::Second, Some("America/New_York".into())), + DataType::Timestamp(TimeUnit::Second, Some("Europe/Brussels".into())), + Operator::Eq, + DataType::Timestamp(TimeUnit::Second, Some("America/New_York".into())) + ); + test_coercion_binary_rule!( + DataType::Timestamp(TimeUnit::Second, Some("Europe/Brussels".into())), + DataType::Timestamp(TimeUnit::Second, utc), + Operator::Eq, + DataType::Timestamp(TimeUnit::Second, Some("Europe/Brussels".into())) + ); + + // list + let inner_field = Arc::new(Field::new_list_field(DataType::Int64, true)); + test_coercion_binary_rule!( + DataType::List(Arc::clone(&inner_field)), + DataType::List(Arc::clone(&inner_field)), + Operator::Eq, + DataType::List(Arc::clone(&inner_field)) + ); + test_coercion_binary_rule!( + DataType::List(Arc::clone(&inner_field)), + DataType::LargeList(Arc::clone(&inner_field)), + Operator::Eq, + DataType::LargeList(Arc::clone(&inner_field)) + ); + test_coercion_binary_rule!( + DataType::LargeList(Arc::clone(&inner_field)), + DataType::List(Arc::clone(&inner_field)), + Operator::Eq, + DataType::LargeList(Arc::clone(&inner_field)) + ); + test_coercion_binary_rule!( + DataType::LargeList(Arc::clone(&inner_field)), + DataType::LargeList(Arc::clone(&inner_field)), + Operator::Eq, + DataType::LargeList(Arc::clone(&inner_field)) + ); + test_coercion_binary_rule!( + DataType::FixedSizeList(Arc::clone(&inner_field), 10), + DataType::FixedSizeList(Arc::clone(&inner_field), 10), + Operator::Eq, + DataType::FixedSizeList(Arc::clone(&inner_field), 10) + ); + test_coercion_binary_rule!( + DataType::FixedSizeList(Arc::clone(&inner_field), 10), + DataType::LargeList(Arc::clone(&inner_field)), + Operator::Eq, + DataType::LargeList(Arc::clone(&inner_field)) + ); + test_coercion_binary_rule!( + DataType::LargeList(Arc::clone(&inner_field)), + DataType::FixedSizeList(Arc::clone(&inner_field), 10), + Operator::Eq, + DataType::LargeList(Arc::clone(&inner_field)) + ); + test_coercion_binary_rule!( + DataType::List(Arc::clone(&inner_field)), + DataType::FixedSizeList(Arc::clone(&inner_field), 10), + Operator::Eq, + DataType::List(Arc::clone(&inner_field)) + ); + test_coercion_binary_rule!( + DataType::FixedSizeList(Arc::clone(&inner_field), 10), + DataType::List(Arc::clone(&inner_field)), + Operator::Eq, + DataType::List(Arc::clone(&inner_field)) + ); + + let inner_timestamp_field = Arc::new(Field::new_list_field( + DataType::Timestamp(TimeUnit::Microsecond, None), + true, + )); + let result_type = BinaryTypeCoercer::new( + &DataType::List(Arc::clone(&inner_field)), + &Operator::Eq, + &DataType::List(Arc::clone(&inner_timestamp_field)), + ) + .get_input_types(); + assert!(result_type.is_err()); + + Ok(()) +} + +#[test] +fn test_list_coercion() { + let lhs_type = DataType::List(Arc::new(Field::new("lhs", DataType::Int8, false))); + + let rhs_type = DataType::List(Arc::new(Field::new("rhs", DataType::Int64, true))); + + let coerced_type = list_coercion(&lhs_type, &rhs_type).unwrap(); + assert_eq!( + coerced_type, + DataType::List(Arc::new(Field::new("lhs", DataType::Int64, true))) + ); +} + +#[test] +fn test_map_coercion() -> Result<()> { + let lhs = Field::new_map( + "lhs", + "entries", + Arc::new(Field::new("keys", DataType::Utf8, false)), + Arc::new(Field::new("values", DataType::LargeUtf8, false)), + true, + false, + ); + let rhs = Field::new_map( + "rhs", + "kvp", + Arc::new(Field::new("k", DataType::Utf8, false)), + Arc::new(Field::new("v", DataType::Utf8, true)), + false, + true, + ); + + let expected = Field::new_map( + "expected", + "entries", + Arc::new(Field::new("keys", DataType::Utf8, false)), + Arc::new(Field::new("values", DataType::LargeUtf8, true)), + false, + true, + ); + + test_coercion_binary_rule!( + lhs.data_type(), + rhs.data_type(), + Operator::Eq, + expected.data_type().clone() + ); + Ok(()) +} diff --git a/datafusion/expr-common/src/type_coercion/binary/tests/dictionary.rs b/datafusion/expr-common/src/type_coercion/binary/tests/dictionary.rs new file mode 100644 index 0000000000000..0fb56a4a2c536 --- /dev/null +++ b/datafusion/expr-common/src/type_coercion/binary/tests/dictionary.rs @@ -0,0 +1,72 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use super::*; + +#[test] +fn test_dictionary_type_coercion() { + use DataType::*; + + let lhs_type = Dictionary(Box::new(Int8), Box::new(Int32)); + let rhs_type = Dictionary(Box::new(Int8), Box::new(Int16)); + assert_eq!( + dictionary_comparison_coercion(&lhs_type, &rhs_type, true), + Some(Int32) + ); + assert_eq!( + dictionary_comparison_coercion(&lhs_type, &rhs_type, false), + Some(Int32) + ); + + // Since we can coerce values of Int16 to Utf8 can support this + let lhs_type = Dictionary(Box::new(Int8), Box::new(Utf8)); + let rhs_type = Dictionary(Box::new(Int8), Box::new(Int16)); + assert_eq!( + dictionary_comparison_coercion(&lhs_type, &rhs_type, true), + Some(Utf8) + ); + + // Since we can coerce values of Utf8 to Binary can support this + let lhs_type = Dictionary(Box::new(Int8), Box::new(Utf8)); + let rhs_type = Dictionary(Box::new(Int8), Box::new(Binary)); + assert_eq!( + dictionary_comparison_coercion(&lhs_type, &rhs_type, true), + Some(Binary) + ); + + let lhs_type = Dictionary(Box::new(Int8), Box::new(Utf8)); + let rhs_type = Utf8; + assert_eq!( + dictionary_comparison_coercion(&lhs_type, &rhs_type, false), + Some(Utf8) + ); + assert_eq!( + dictionary_comparison_coercion(&lhs_type, &rhs_type, true), + Some(lhs_type.clone()) + ); + + let lhs_type = Utf8; + let rhs_type = Dictionary(Box::new(Int8), Box::new(Utf8)); + assert_eq!( + dictionary_comparison_coercion(&lhs_type, &rhs_type, false), + Some(Utf8) + ); + assert_eq!( + dictionary_comparison_coercion(&lhs_type, &rhs_type, true), + Some(rhs_type.clone()) + ); +} diff --git a/datafusion/expr-common/src/type_coercion/binary/tests/mod.rs b/datafusion/expr-common/src/type_coercion/binary/tests/mod.rs new file mode 100644 index 0000000000000..6d21d795e4b72 --- /dev/null +++ b/datafusion/expr-common/src/type_coercion/binary/tests/mod.rs @@ -0,0 +1,79 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use super::*; + +// Common test macros + +/// Tests that coercion for a binary operator between two types yields the expected result type for both sides. +/// +/// Usage: test_coercion_binary_rule!(lhs_type, rhs_type, op, expected_type) +/// - lhs_type: The left-hand side data type +/// - rhs_type: The right-hand side data type +/// - op: The binary operator (e.g., "+", "-", etc.) +/// - expected_type: The type both sides should be coerced to +macro_rules! test_coercion_binary_rule { + ($LHS_TYPE:expr, $RHS_TYPE:expr, $OP:expr, $RESULT_TYPE:expr) => {{ + let (lhs, rhs) = + BinaryTypeCoercer::new(&$LHS_TYPE, &$OP, &$RHS_TYPE).get_input_types()?; + assert_eq!(lhs, $RESULT_TYPE); + assert_eq!(rhs, $RESULT_TYPE); + }}; +} + +/// Tests that coercion for a binary operator between one type and multiple right-hand side types +/// yields the expected result type for both sides, in both lhs/rhs and rhs/lhs order. +/// +/// Usage: test_coercion_binary_rule_multiple!(lhs_type, rhs_types, op, expected_type) +/// - lhs_type: The left-hand side data type +/// - rhs_types: An iterable of right-hand side data types +/// - op: The binary operator +/// - expected_type: The type both sides should be coerced to +macro_rules! test_coercion_binary_rule_multiple { + ($LHS_TYPE:expr, $RHS_TYPES:expr, $OP:expr, $RESULT_TYPE:expr) => {{ + for rh_type in $RHS_TYPES { + let (lhs, rhs) = + BinaryTypeCoercer::new(&$LHS_TYPE, &$OP, &rh_type).get_input_types()?; + assert_eq!(lhs, $RESULT_TYPE); + assert_eq!(rhs, $RESULT_TYPE); + + BinaryTypeCoercer::new(&rh_type, &$OP, &$LHS_TYPE).get_input_types()?; + assert_eq!(lhs, $RESULT_TYPE); + assert_eq!(rhs, $RESULT_TYPE); + } + }}; +} + +/// Tests that the like_coercion function returns the expected result type for both lhs/rhs and rhs/lhs order. +/// +/// Usage: test_like_rule!(lhs_type, rhs_type, expected_type) +/// - lhs_type: The left-hand side data type +/// - rhs_type: The right-hand side data type +/// - expected_type: The expected result type from like_coercion +macro_rules! test_like_rule { + ($LHS_TYPE:expr, $RHS_TYPE:expr, $RESULT_TYPE:expr) => {{ + let result = like_coercion(&$LHS_TYPE, &$RHS_TYPE); + assert_eq!(result, $RESULT_TYPE); + let result = like_coercion(&$RHS_TYPE, &$LHS_TYPE); + assert_eq!(result, $RESULT_TYPE); + }}; +} + +mod arithmetic; +mod comparison; +mod dictionary; +mod null_coercion; diff --git a/datafusion/expr-common/src/type_coercion/binary/tests/null_coercion.rs b/datafusion/expr-common/src/type_coercion/binary/tests/null_coercion.rs new file mode 100644 index 0000000000000..91c826b563c7c --- /dev/null +++ b/datafusion/expr-common/src/type_coercion/binary/tests/null_coercion.rs @@ -0,0 +1,72 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use super::*; + +#[test] +fn test_type_coercion_logical_op() -> Result<()> { + test_coercion_binary_rule!( + DataType::Boolean, + DataType::Boolean, + Operator::And, + DataType::Boolean + ); + + test_coercion_binary_rule!( + DataType::Boolean, + DataType::Boolean, + Operator::Or, + DataType::Boolean + ); + test_coercion_binary_rule!( + DataType::Boolean, + DataType::Null, + Operator::And, + DataType::Boolean + ); + test_coercion_binary_rule!( + DataType::Boolean, + DataType::Null, + Operator::Or, + DataType::Boolean + ); + test_coercion_binary_rule!( + DataType::Null, + DataType::Null, + Operator::Or, + DataType::Boolean + ); + test_coercion_binary_rule!( + DataType::Null, + DataType::Null, + Operator::And, + DataType::Boolean + ); + test_coercion_binary_rule!( + DataType::Null, + DataType::Boolean, + Operator::And, + DataType::Boolean + ); + test_coercion_binary_rule!( + DataType::Null, + DataType::Boolean, + Operator::Or, + DataType::Boolean + ); + Ok(()) +} diff --git a/datafusion/expr/src/async_udf.rs b/datafusion/expr/src/async_udf.rs index d900c16345230..a62d4d5341f08 100644 --- a/datafusion/expr/src/async_udf.rs +++ b/datafusion/expr/src/async_udf.rs @@ -15,17 +15,20 @@ // specific language governing permissions and limitations // under the License. -use crate::{ReturnFieldArgs, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl}; +use crate::utils::{arc_ptr_eq, arc_ptr_hash}; +use crate::{ + udf_equals_hash, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, +}; use arrow::array::ArrayRef; use arrow::datatypes::{DataType, FieldRef}; use async_trait::async_trait; -use datafusion_common::config::ConfigOptions; use datafusion_common::error::Result; use datafusion_common::internal_err; use datafusion_expr_common::columnar_value::ColumnarValue; use datafusion_expr_common::signature::Signature; use std::any::Any; use std::fmt::{Debug, Display}; +use std::hash::{Hash, Hasher}; use std::sync::Arc; /// A scalar UDF that can invoke using async methods @@ -45,11 +48,7 @@ pub trait AsyncScalarUDFImpl: ScalarUDFImpl { } /// Invoke the function asynchronously with the async arguments - async fn invoke_async_with_args( - &self, - args: ScalarFunctionArgs, - option: &ConfigOptions, - ) -> Result; + async fn invoke_async_with_args(&self, args: ScalarFunctionArgs) -> Result; } /// A scalar UDF that must be invoked using async methods @@ -61,6 +60,21 @@ pub struct AsyncScalarUDF { inner: Arc, } +impl PartialEq for AsyncScalarUDF { + fn eq(&self, other: &Self) -> bool { + let Self { inner } = self; + // TODO when MSRV >= 1.86.0, switch to `inner.equals(other.inner.as_ref())` leveraging trait upcasting. + arc_ptr_eq(inner, &other.inner) + } +} + +impl Hash for AsyncScalarUDF { + fn hash(&self, state: &mut H) { + let Self { inner } = self; + arc_ptr_hash(inner, state); + } +} + impl AsyncScalarUDF { pub fn new(inner: Arc) -> Self { Self { inner } @@ -81,9 +95,8 @@ impl AsyncScalarUDF { pub async fn invoke_async_with_args( &self, args: ScalarFunctionArgs, - option: &ConfigOptions, ) -> Result { - self.inner.invoke_async_with_args(args, option).await + self.inner.invoke_async_with_args(args).await } } @@ -111,6 +124,8 @@ impl ScalarUDFImpl for AsyncScalarUDF { fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result { internal_err!("async functions should not be called directly") } + + udf_equals_hash!(ScalarUDFImpl); } impl Display for AsyncScalarUDF { diff --git a/datafusion/expr/src/execution_props.rs b/datafusion/expr/src/execution_props.rs index d6418247db760..46badcd3ece78 100644 --- a/datafusion/expr/src/execution_props.rs +++ b/datafusion/expr/src/execution_props.rs @@ -118,6 +118,6 @@ mod test { #[test] fn debug() { let props = ExecutionProps::new(); - assert_eq!("ExecutionProps { query_execution_start_time: 1970-01-01T00:00:00Z, alias_generator: AliasGenerator { next_id: 1 }, var_providers: None }", format!("{props:?}")); + assert_eq!("ExecutionProps { query_execution_start_time: 1970-01-01T00:00:00Z, alias_generator: AliasGenerator { next_id: 1 }, config_options: None, var_providers: None }", format!("{props:?}")); } } diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index c50268d99676f..0749ff0e98b71 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -27,8 +27,8 @@ use std::sync::Arc; use crate::expr_fn::binary_expr; use crate::function::WindowFunctionSimplification; use crate::logical_plan::Subquery; -use crate::Volatility; -use crate::{udaf, ExprSchemable, Operator, Signature, WindowFrame, WindowUDF}; +use crate::{AggregateUDF, Volatility}; +use crate::{ExprSchemable, Operator, Signature, WindowFrame, WindowUDF}; use arrow::datatypes::{DataType, Field, FieldRef}; use datafusion_common::cse::{HashNode, NormalizeEq, Normalizeable}; @@ -982,7 +982,7 @@ impl<'a> TreeNodeContainer<'a, Expr> for Sort { #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] pub struct AggregateFunction { /// Name of the function - pub func: Arc, + pub func: Arc, pub params: AggregateFunctionParams, } @@ -994,18 +994,18 @@ pub struct AggregateFunctionParams { /// Optional filter pub filter: Option>, /// Optional ordering - pub order_by: Option>, + pub order_by: Vec, pub null_treatment: Option, } impl AggregateFunction { /// Create a new AggregateFunction expression with a user-defined function (UDF) pub fn new_udf( - func: Arc, + func: Arc, args: Vec, distinct: bool, filter: Option>, - order_by: Option>, + order_by: Vec, null_treatment: Option, ) -> Self { Self { @@ -1029,7 +1029,7 @@ impl AggregateFunction { #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)] pub enum WindowFunctionDefinition { /// A user defined aggregate function - AggregateUDF(Arc), + AggregateUDF(Arc), /// A user defined aggregate function WindowUDF(Arc), } @@ -1088,8 +1088,8 @@ impl Display for WindowFunctionDefinition { } } -impl From> for WindowFunctionDefinition { - fn from(value: Arc) -> Self { +impl From> for WindowFunctionDefinition { + fn from(value: Arc) -> Self { Self::AggregateUDF(value) } } @@ -1173,38 +1173,6 @@ impl Exists { } } -/// User Defined Aggregate Function -/// -/// See [`udaf::AggregateUDF`] for more information. -#[derive(Clone, PartialEq, Eq, Hash, Debug)] -pub struct AggregateUDF { - /// The function - pub fun: Arc, - /// List of expressions to feed to the functions as arguments - pub args: Vec, - /// Optional filter - pub filter: Option>, - /// Optional ORDER BY applied prior to aggregating - pub order_by: Option>, -} - -impl AggregateUDF { - /// Create a new AggregateUDF expression - pub fn new( - fun: Arc, - args: Vec, - filter: Option>, - order_by: Option>, - ) -> Self { - Self { - fun, - args, - filter, - order_by, - } - } -} - /// InList expression #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] pub struct InList { @@ -2303,18 +2271,15 @@ impl NormalizeEq for Expr { (None, None) => true, _ => false, } - && match (self_order_by, other_order_by) { - (Some(self_order_by), Some(other_order_by)) => self_order_by - .iter() - .zip(other_order_by.iter()) - .all(|(a, b)| { - a.asc == b.asc - && a.nulls_first == b.nulls_first - && a.expr.normalize_eq(&b.expr) - }), - (None, None) => true, - _ => false, - } + && self_order_by + .iter() + .zip(other_order_by.iter()) + .all(|(a, b)| { + a.asc == b.asc + && a.nulls_first == b.nulls_first + && a.expr.normalize_eq(&b.expr) + }) + && self_order_by.len() == other_order_by.len() } (Expr::WindowFunction(left), Expr::WindowFunction(other)) => { let WindowFunction { diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index e8885ed6b7240..1d8d183807b91 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -26,10 +26,11 @@ use crate::function::{ StateFieldsArgs, }; use crate::select_expr::SelectExpr; +use crate::utils::{arc_ptr_eq, arc_ptr_hash}; use crate::{ conditional_expressions::CaseBuilder, expr::Sort, logical_plan::Subquery, - AggregateUDF, Expr, LogicalPlan, Operator, PartitionEvaluator, ScalarFunctionArgs, - ScalarFunctionImplementation, ScalarUDF, Signature, Volatility, + udf_equals_hash, AggregateUDF, Expr, LogicalPlan, Operator, PartitionEvaluator, + ScalarFunctionArgs, ScalarFunctionImplementation, ScalarUDF, Signature, Volatility, }; use crate::{ AggregateUDFImpl, ColumnarValue, ScalarUDFImpl, WindowFrame, WindowUDF, WindowUDFImpl, @@ -44,6 +45,7 @@ use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; use sqlparser::ast::NullTreatment; use std::any::Any; use std::fmt::Debug; +use std::hash::{DefaultHasher, Hash, Hasher}; use std::ops::Not; use std::sync::Arc; @@ -408,6 +410,36 @@ pub struct SimpleScalarUDF { fun: ScalarFunctionImplementation, } +impl PartialEq for SimpleScalarUDF { + fn eq(&self, other: &Self) -> bool { + let Self { + name, + signature, + return_type, + fun, + } = self; + name == &other.name + && signature == &other.signature + && return_type == &other.return_type + && arc_ptr_eq(fun, &other.fun) + } +} + +impl Hash for SimpleScalarUDF { + fn hash(&self, state: &mut H) { + let Self { + name, + signature, + return_type, + fun, + } = self; + name.hash(state); + signature.hash(state); + return_type.hash(state); + arc_ptr_hash(fun, state); + } +} + impl Debug for SimpleScalarUDF { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { f.debug_struct("SimpleScalarUDF") @@ -474,6 +506,8 @@ impl ScalarUDFImpl for SimpleScalarUDF { fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { (self.fun)(&args.args) } + + udf_equals_hash!(ScalarUDFImpl); } /// Creates a new UDAF with a specific signature, state type and return type. @@ -594,6 +628,42 @@ impl AggregateUDFImpl for SimpleAggregateUDF { fn state_fields(&self, _args: StateFieldsArgs) -> Result> { Ok(self.state_fields.clone()) } + + fn equals(&self, other: &dyn AggregateUDFImpl) -> bool { + let Some(other) = other.as_any().downcast_ref::() else { + return false; + }; + let Self { + name, + signature, + return_type, + accumulator, + state_fields, + } = self; + name == &other.name + && signature == &other.signature + && return_type == &other.return_type + && Arc::ptr_eq(accumulator, &other.accumulator) + && state_fields == &other.state_fields + } + + fn hash_value(&self) -> u64 { + let Self { + name, + signature, + return_type, + accumulator, + state_fields, + } = self; + let mut hasher = DefaultHasher::new(); + std::any::type_name::().hash(&mut hasher); + name.hash(&mut hasher); + signature.hash(&mut hasher); + return_type.hash(&mut hasher); + Arc::as_ptr(accumulator).hash(&mut hasher); + state_fields.hash(&mut hasher); + hasher.finish() + } } /// Creates a new UDWF with a specific signature, state type and return type. @@ -686,6 +756,41 @@ impl WindowUDFImpl for SimpleWindowUDF { true, ))) } + + fn equals(&self, other: &dyn WindowUDFImpl) -> bool { + let Some(other) = other.as_any().downcast_ref::() else { + return false; + }; + let Self { + name, + signature, + return_type, + partition_evaluator_factory, + } = self; + name == &other.name + && signature == &other.signature + && return_type == &other.return_type + && Arc::ptr_eq( + partition_evaluator_factory, + &other.partition_evaluator_factory, + ) + } + + fn hash_value(&self) -> u64 { + let Self { + name, + signature, + return_type, + partition_evaluator_factory, + } = self; + let mut hasher = DefaultHasher::new(); + std::any::type_name::().hash(&mut hasher); + name.hash(&mut hasher); + signature.hash(&mut hasher); + return_type.hash(&mut hasher); + Arc::as_ptr(partition_evaluator_factory).hash(&mut hasher); + hasher.finish() + } } pub fn interval_year_month_lit(value: &str) -> Expr { @@ -821,7 +926,7 @@ impl ExprFuncBuilder { let fun_expr = match fun { ExprFuncKind::Aggregate(mut udaf) => { - udaf.params.order_by = order_by; + udaf.params.order_by = order_by.unwrap_or_default(); udaf.params.filter = filter.map(Box::new); udaf.params.distinct = distinct; udaf.params.null_treatment = null_treatment; diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index abede09794896..1ab5ffa758421 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -48,16 +48,14 @@ use crate::{ }; use super::dml::InsertOp; -use super::plan::ColumnUnnestList; use arrow::compute::can_cast_types; use arrow::datatypes::{DataType, Field, Fields, Schema, SchemaRef}; use datafusion_common::display::ToStringifiedPlan; use datafusion_common::file_options::file_type::FileType; use datafusion_common::{ - exec_err, get_target_functional_dependencies, internal_err, not_impl_err, - plan_datafusion_err, plan_err, Column, Constraints, DFSchema, DFSchemaRef, - DataFusionError, NullEquality, Result, ScalarValue, TableReference, ToDFSchema, - UnnestOptions, + exec_err, get_target_functional_dependencies, not_impl_err, plan_datafusion_err, + plan_err, Column, Constraints, DFSchema, DFSchemaRef, DataFusionError, NullEquality, + Result, ScalarValue, TableReference, ToDFSchema, UnnestOptions, }; use datafusion_expr_common::type_coercion::binary::type_union_resolution; @@ -407,13 +405,13 @@ impl LogicalPlanBuilder { options: HashMap, partition_by: Vec, ) -> Result { - Ok(Self::new(LogicalPlan::Copy(CopyTo { - input: Arc::new(input), + Ok(Self::new(LogicalPlan::Copy(CopyTo::new( + Arc::new(input), output_url, partition_by, file_type, options, - }))) + )))) } /// Create a [`DmlStatement`] for inserting the contents of this builder into the named table. @@ -1675,6 +1673,38 @@ pub fn build_join_schema( dfschema.with_functional_dependencies(func_dependencies) } +/// (Re)qualify the sides of a join if needed, i.e. if the columns from one side would otherwise +/// conflict with the columns from the other. +/// This is especially useful for queries that come as Substrait, since Substrait doesn't currently allow specifying +/// aliases, neither for columns nor for tables. DataFusion requires columns to be uniquely identifiable, in some +/// places (see e.g. DFSchema::check_names). +/// The function returns: +/// - The requalified or original left logical plan +/// - The requalified or original right logical plan +/// - If a requalification was needed or not +pub fn requalify_sides_if_needed( + left: LogicalPlanBuilder, + right: LogicalPlanBuilder, +) -> Result<(LogicalPlanBuilder, LogicalPlanBuilder, bool)> { + let left_cols = left.schema().columns(); + let right_cols = right.schema().columns(); + if left_cols.iter().any(|l| { + right_cols.iter().any(|r| { + l == r || (l.name == r.name && (l.relation.is_none() || r.relation.is_none())) + }) + }) { + // These names have no connection to the original plan, but they'll make the columns + // (mostly) unique. + Ok(( + left.alias(TableReference::bare("left"))?, + right.alias(TableReference::bare("right"))?, + true, + )) + } else { + Ok((left, right, false)) + } +} + /// Add additional "synthetic" group by expressions based on functional /// dependencies. /// @@ -2057,27 +2087,6 @@ pub fn unnest(input: LogicalPlan, columns: Vec) -> Result { unnest_with_options(input, columns, UnnestOptions::default()) } -// Get the data type of a multi-dimensional type after unnesting it -// with a given depth -fn get_unnested_list_datatype_recursive( - data_type: &DataType, - depth: usize, -) -> Result { - match data_type { - DataType::List(field) - | DataType::FixedSizeList(field, _) - | DataType::LargeList(field) => { - if depth == 1 { - return Ok(field.data_type().clone()); - } - return get_unnested_list_datatype_recursive(field.data_type(), depth - 1); - } - _ => {} - }; - - internal_err!("trying to unnest on invalid data type {:?}", data_type) -} - pub fn get_struct_unnested_columns( col_name: &String, inner_fields: &Fields, @@ -2088,53 +2097,6 @@ pub fn get_struct_unnested_columns( .collect() } -// Based on data type, either struct or a variant of list -// return a set of columns as the result of unnesting -// the input columns. -// For example, given a column with name "a", -// - List(Element) returns ["a"] with data type Element -// - Struct(field1, field2) returns ["a.field1","a.field2"] -// For list data type, an argument depth is used to specify -// the recursion level -pub fn get_unnested_columns( - col_name: &String, - data_type: &DataType, - depth: usize, -) -> Result)>> { - let mut qualified_columns = Vec::with_capacity(1); - - match data_type { - DataType::List(_) | DataType::FixedSizeList(_, _) | DataType::LargeList(_) => { - let data_type = get_unnested_list_datatype_recursive(data_type, depth)?; - let new_field = Arc::new(Field::new( - col_name, data_type, - // Unnesting may produce NULLs even if the list is not null. - // For example: unnest([1], []) -> 1, null - true, - )); - let column = Column::from_name(col_name); - // let column = Column::from((None, &new_field)); - qualified_columns.push((column, new_field)); - } - DataType::Struct(fields) => { - qualified_columns.extend(fields.iter().map(|f| { - let new_name = format!("{}.{}", col_name, f.name()); - let column = Column::from_name(&new_name); - let new_field = f.as_ref().clone().with_name(new_name); - // let column = Column::from((None, &f)); - (column, Arc::new(new_field)) - })) - } - _ => { - return internal_err!( - "trying to unnest on invalid data type {:?}", - data_type - ); - } - }; - Ok(qualified_columns) -} - /// Create a [`LogicalPlan::Unnest`] plan with options /// This function receive a list of columns to be unnested /// because multiple unnest can be performed on the same column (e.g unnest with different depth) @@ -2169,126 +2131,11 @@ pub fn unnest_with_options( columns_to_unnest: Vec, options: UnnestOptions, ) -> Result { - let mut list_columns: Vec<(usize, ColumnUnnestList)> = vec![]; - let mut struct_columns = vec![]; - let indices_to_unnest = columns_to_unnest - .iter() - .map(|c| Ok((input.schema().index_of_column(c)?, c))) - .collect::>>()?; - - let input_schema = input.schema(); - - let mut dependency_indices = vec![]; - // Transform input schema into new schema - // Given this comprehensive example - // - // input schema: - // 1.col1_unnest_placeholder: list[list[int]], - // 2.col1: list[list[int]] - // 3.col2: list[int] - // with unnest on unnest(col1,depth=2), unnest(col1,depth=1) and unnest(col2,depth=1) - // output schema: - // 1.unnest_col1_depth_2: int - // 2.unnest_col1_depth_1: list[int] - // 3.col1: list[list[int]] - // 4.unnest_col2_depth_1: int - // Meaning the placeholder column will be replaced by its unnested variation(s), note - // the plural. - let fields = input_schema - .iter() - .enumerate() - .map(|(index, (original_qualifier, original_field))| { - match indices_to_unnest.get(&index) { - Some(column_to_unnest) => { - let recursions_on_column = options - .recursions - .iter() - .filter(|p| -> bool { &p.input_column == *column_to_unnest }) - .collect::>(); - let mut transformed_columns = recursions_on_column - .iter() - .map(|r| { - list_columns.push(( - index, - ColumnUnnestList { - output_column: r.output_column.clone(), - depth: r.depth, - }, - )); - Ok(get_unnested_columns( - &r.output_column.name, - original_field.data_type(), - r.depth, - )? - .into_iter() - .next() - .unwrap()) // because unnesting a list column always result into one result - }) - .collect::)>>>()?; - if transformed_columns.is_empty() { - transformed_columns = get_unnested_columns( - &column_to_unnest.name, - original_field.data_type(), - 1, - )?; - match original_field.data_type() { - DataType::Struct(_) => { - struct_columns.push(index); - } - DataType::List(_) - | DataType::FixedSizeList(_, _) - | DataType::LargeList(_) => { - list_columns.push(( - index, - ColumnUnnestList { - output_column: Column::from_name( - &column_to_unnest.name, - ), - depth: 1, - }, - )); - } - _ => {} - }; - } - - // new columns dependent on the same original index - dependency_indices - .extend(std::iter::repeat_n(index, transformed_columns.len())); - Ok(transformed_columns - .iter() - .map(|(col, field)| (col.relation.to_owned(), field.to_owned())) - .collect()) - } - None => { - dependency_indices.push(index); - Ok(vec![( - original_qualifier.cloned(), - Arc::clone(original_field), - )]) - } - } - }) - .collect::>>()? - .into_iter() - .flatten() - .collect::>(); - - let metadata = input_schema.metadata().clone(); - let df_schema = DFSchema::new_with_metadata(fields, metadata)?; - // We can use the existing functional dependencies: - let deps = input_schema.functional_dependencies().clone(); - let schema = Arc::new(df_schema.with_functional_dependencies(deps)?); - - Ok(LogicalPlan::Unnest(Unnest { - input: Arc::new(input), - exec_columns: columns_to_unnest, - list_type_columns: list_columns, - struct_type_columns: struct_columns, - dependency_indices, - schema, + Ok(LogicalPlan::Unnest(Unnest::try_new( + Arc::new(input), + columns_to_unnest, options, - })) + )?)) } #[cfg(test)] @@ -2534,20 +2381,24 @@ mod tests { .project(vec![col("id"), col("first_name").alias("id")]); match plan { - Err(DataFusionError::SchemaError( - SchemaError::AmbiguousReference { - field: - Column { - relation: Some(TableReference::Bare { table }), - name, - spans: _, - }, - }, - _, - )) => { - assert_eq!(*"employee_csv", *table); - assert_eq!("id", &name); - Ok(()) + Err(DataFusionError::SchemaError(err, _)) => { + if let SchemaError::AmbiguousReference { field } = *err { + let Column { + relation, + name, + spans: _, + } = *field; + let Some(TableReference::Bare { table }) = relation else { + return plan_err!( + "wrong relation: {relation:?}, expected table name" + ); + }; + assert_eq!(*"employee_csv", *table); + assert_eq!("id", &name); + Ok(()) + } else { + plan_err!("Plan should have returned an DataFusionError::SchemaError") + } } _ => plan_err!("Plan should have returned an DataFusionError::SchemaError"), } diff --git a/datafusion/expr/src/logical_plan/display.rs b/datafusion/expr/src/logical_plan/display.rs index f1e455f46db30..cc3fbad7b0c22 100644 --- a/datafusion/expr/src/logical_plan/display.rs +++ b/datafusion/expr/src/logical_plan/display.rs @@ -426,6 +426,7 @@ impl<'a, 'b> PgJsonVisitor<'a, 'b> { file_type, partition_by: _, options, + output_schema: _, }) => { let op_str = options .iter() diff --git a/datafusion/expr/src/logical_plan/dml.rs b/datafusion/expr/src/logical_plan/dml.rs index f3c95e696b4b6..369b91e204b99 100644 --- a/datafusion/expr/src/logical_plan/dml.rs +++ b/datafusion/expr/src/logical_plan/dml.rs @@ -40,6 +40,8 @@ pub struct CopyTo { pub file_type: Arc, /// SQL Options that can affect the formats pub options: HashMap, + /// The schema of the output (a single column "count") + pub output_schema: DFSchemaRef, } impl Debug for CopyTo { @@ -50,6 +52,7 @@ impl Debug for CopyTo { .field("partition_by", &self.partition_by) .field("file_type", &"...") .field("options", &self.options) + .field("output_schema", &self.output_schema) .finish_non_exhaustive() } } @@ -89,6 +92,26 @@ impl Hash for CopyTo { } } +impl CopyTo { + pub fn new( + input: Arc, + output_url: String, + partition_by: Vec, + file_type: Arc, + options: HashMap, + ) -> Self { + Self { + input, + output_url, + partition_by, + file_type, + options, + // The output schema is always a single column "count" with the number of rows copied + output_schema: make_count_schema(), + } + } +} + /// Modifies the content of a database /// /// This operator is used to perform DML operations such as INSERT, DELETE, diff --git a/datafusion/expr/src/logical_plan/mod.rs b/datafusion/expr/src/logical_plan/mod.rs index be5b44098a023..4bbb9d7ada7e9 100644 --- a/datafusion/expr/src/logical_plan/mod.rs +++ b/datafusion/expr/src/logical_plan/mod.rs @@ -27,8 +27,9 @@ mod statement; pub mod tree_node; pub use builder::{ - build_join_schema, table_scan, union, wrap_projection_for_join_if_necessary, - LogicalPlanBuilder, LogicalPlanBuilderOptions, LogicalTableSource, UNNAMED_TABLE, + build_join_schema, requalify_sides_if_needed, table_scan, union, + wrap_projection_for_join_if_necessary, LogicalPlanBuilder, LogicalPlanBuilderOptions, + LogicalTableSource, UNNAMED_TABLE, }; pub use ddl::{ CreateCatalog, CreateCatalogSchema, CreateExternalTable, CreateFunction, diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index ca431200eff6b..d68e6cd812725 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -43,9 +43,10 @@ use crate::utils::{ grouping_set_expr_count, grouping_set_to_exprlist, split_conjunction, }; use crate::{ - build_join_schema, expr_vec_fmt, BinaryExpr, CreateMemoryTable, CreateView, Execute, - Expr, ExprSchemable, LogicalPlanBuilder, Operator, Prepare, - TableProviderFilterPushDown, TableSource, WindowFunctionDefinition, + build_join_schema, expr_vec_fmt, requalify_sides_if_needed, BinaryExpr, + CreateMemoryTable, CreateView, Execute, Expr, ExprSchemable, LogicalPlanBuilder, + Operator, Prepare, TableProviderFilterPushDown, TableSource, + WindowFunctionDefinition, }; use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; @@ -344,7 +345,7 @@ impl LogicalPlan { output_schema } LogicalPlan::Dml(DmlStatement { output_schema, .. }) => output_schema, - LogicalPlan::Copy(CopyTo { input, .. }) => input.schema(), + LogicalPlan::Copy(CopyTo { output_schema, .. }) => output_schema, LogicalPlan::Ddl(ddl) => ddl.schema(), LogicalPlan::Unnest(Unnest { schema, .. }) => schema, LogicalPlan::RecursiveQuery(RecursiveQuery { static_term, .. }) => { @@ -809,16 +810,17 @@ impl LogicalPlan { file_type, options, partition_by, + output_schema: _, }) => { self.assert_no_expressions(expr)?; let input = self.only_input(inputs)?; - Ok(LogicalPlan::Copy(CopyTo { - input: Arc::new(input), - output_url: output_url.clone(), - file_type: Arc::clone(file_type), - options: options.clone(), - partition_by: partition_by.clone(), - })) + Ok(LogicalPlan::Copy(CopyTo::new( + Arc::new(input), + output_url.clone(), + partition_by.clone(), + Arc::clone(file_type), + options.clone(), + ))) } LogicalPlan::Values(Values { schema, .. }) => { self.assert_no_inputs(inputs)?; @@ -3795,37 +3797,61 @@ impl Join { }) } - /// Create Join with input which wrapped with projection, this method is used to help create physical join. + /// Create Join with input which wrapped with projection, this method is used in physcial planning only to help + /// create the physical join. pub fn try_new_with_project_input( original: &LogicalPlan, left: Arc, right: Arc, column_on: (Vec, Vec), - ) -> Result { + ) -> Result<(Self, bool)> { let original_join = match original { LogicalPlan::Join(join) => join, _ => return plan_err!("Could not create join with project input"), }; + let mut left_sch = LogicalPlanBuilder::from(Arc::clone(&left)); + let mut right_sch = LogicalPlanBuilder::from(Arc::clone(&right)); + + let mut requalified = false; + + // By definition, the resulting schema of an inner/left/right & full join will have first the left side fields and then the right, + // potentially having duplicate field names. Note this will only qualify fields if they have not been qualified before. + if original_join.join_type == JoinType::Inner + || original_join.join_type == JoinType::Left + || original_join.join_type == JoinType::Right + || original_join.join_type == JoinType::Full + { + (left_sch, right_sch, requalified) = + requalify_sides_if_needed(left_sch.clone(), right_sch.clone())?; + } + let on: Vec<(Expr, Expr)> = column_on .0 .into_iter() .zip(column_on.1) .map(|(l, r)| (Expr::Column(l), Expr::Column(r))) .collect(); - let join_schema = - build_join_schema(left.schema(), right.schema(), &original_join.join_type)?; - Ok(Join { - left, - right, - on, - filter: original_join.filter.clone(), - join_type: original_join.join_type, - join_constraint: original_join.join_constraint, - schema: Arc::new(join_schema), - null_equality: original_join.null_equality, - }) + let join_schema = build_join_schema( + left_sch.schema(), + right_sch.schema(), + &original_join.join_type, + )?; + + Ok(( + Join { + left, + right, + on, + filter: original_join.filter.clone(), + join_type: original_join.join_type, + join_constraint: original_join.join_constraint, + schema: Arc::new(join_schema), + null_equality: original_join.null_equality, + }, + requalified, + )) } } @@ -4036,6 +4062,211 @@ impl PartialOrd for Unnest { } } +impl Unnest { + pub fn try_new( + input: Arc, + exec_columns: Vec, + options: UnnestOptions, + ) -> Result { + if exec_columns.is_empty() { + return plan_err!("unnest plan requires at least 1 column to unnest"); + } + + let mut list_columns: Vec<(usize, ColumnUnnestList)> = vec![]; + let mut struct_columns = vec![]; + let indices_to_unnest = exec_columns + .iter() + .map(|c| Ok((input.schema().index_of_column(c)?, c))) + .collect::>>()?; + + let input_schema = input.schema(); + + let mut dependency_indices = vec![]; + // Transform input schema into new schema + // Given this comprehensive example + // + // input schema: + // 1.col1_unnest_placeholder: list[list[int]], + // 2.col1: list[list[int]] + // 3.col2: list[int] + // with unnest on unnest(col1,depth=2), unnest(col1,depth=1) and unnest(col2,depth=1) + // output schema: + // 1.unnest_col1_depth_2: int + // 2.unnest_col1_depth_1: list[int] + // 3.col1: list[list[int]] + // 4.unnest_col2_depth_1: int + // Meaning the placeholder column will be replaced by its unnested variation(s), note + // the plural. + let fields = input_schema + .iter() + .enumerate() + .map(|(index, (original_qualifier, original_field))| { + match indices_to_unnest.get(&index) { + Some(column_to_unnest) => { + let recursions_on_column = options + .recursions + .iter() + .filter(|p| -> bool { &p.input_column == *column_to_unnest }) + .collect::>(); + let mut transformed_columns = recursions_on_column + .iter() + .map(|r| { + list_columns.push(( + index, + ColumnUnnestList { + output_column: r.output_column.clone(), + depth: r.depth, + }, + )); + Ok(get_unnested_columns( + &r.output_column.name, + original_field.data_type(), + r.depth, + )? + .into_iter() + .next() + .unwrap()) // because unnesting a list column always result into one result + }) + .collect::)>>>()?; + if transformed_columns.is_empty() { + transformed_columns = get_unnested_columns( + &column_to_unnest.name, + original_field.data_type(), + 1, + )?; + match original_field.data_type() { + DataType::Struct(_) => { + struct_columns.push(index); + } + DataType::List(_) + | DataType::FixedSizeList(_, _) + | DataType::LargeList(_) => { + list_columns.push(( + index, + ColumnUnnestList { + output_column: Column::from_name( + &column_to_unnest.name, + ), + depth: 1, + }, + )); + } + _ => {} + }; + } + + // new columns dependent on the same original index + dependency_indices.extend(std::iter::repeat_n( + index, + transformed_columns.len(), + )); + Ok(transformed_columns + .iter() + .map(|(col, field)| { + (col.relation.to_owned(), field.to_owned()) + }) + .collect()) + } + None => { + dependency_indices.push(index); + Ok(vec![( + original_qualifier.cloned(), + Arc::clone(original_field), + )]) + } + } + }) + .collect::>>()? + .into_iter() + .flatten() + .collect::>(); + + let metadata = input_schema.metadata().clone(); + let df_schema = DFSchema::new_with_metadata(fields, metadata)?; + // We can use the existing functional dependencies: + let deps = input_schema.functional_dependencies().clone(); + let schema = Arc::new(df_schema.with_functional_dependencies(deps)?); + + Ok(Unnest { + input, + exec_columns, + list_type_columns: list_columns, + struct_type_columns: struct_columns, + dependency_indices, + schema, + options, + }) + } +} + +// Based on data type, either struct or a variant of list +// return a set of columns as the result of unnesting +// the input columns. +// For example, given a column with name "a", +// - List(Element) returns ["a"] with data type Element +// - Struct(field1, field2) returns ["a.field1","a.field2"] +// For list data type, an argument depth is used to specify +// the recursion level +fn get_unnested_columns( + col_name: &String, + data_type: &DataType, + depth: usize, +) -> Result)>> { + let mut qualified_columns = Vec::with_capacity(1); + + match data_type { + DataType::List(_) | DataType::FixedSizeList(_, _) | DataType::LargeList(_) => { + let data_type = get_unnested_list_datatype_recursive(data_type, depth)?; + let new_field = Arc::new(Field::new( + col_name, data_type, + // Unnesting may produce NULLs even if the list is not null. + // For example: unnest([1], []) -> 1, null + true, + )); + let column = Column::from_name(col_name); + // let column = Column::from((None, &new_field)); + qualified_columns.push((column, new_field)); + } + DataType::Struct(fields) => { + qualified_columns.extend(fields.iter().map(|f| { + let new_name = format!("{}.{}", col_name, f.name()); + let column = Column::from_name(&new_name); + let new_field = f.as_ref().clone().with_name(new_name); + // let column = Column::from((None, &f)); + (column, Arc::new(new_field)) + })) + } + _ => { + return internal_err!( + "trying to unnest on invalid data type {:?}", + data_type + ); + } + }; + Ok(qualified_columns) +} + +// Get the data type of a multi-dimensional type after unnesting it +// with a given depth +fn get_unnested_list_datatype_recursive( + data_type: &DataType, + depth: usize, +) -> Result { + match data_type { + DataType::List(field) + | DataType::FixedSizeList(field, _) + | DataType::LargeList(field) => { + if depth == 1 { + return Ok(field.data_type().clone()); + } + return get_unnested_list_datatype_recursive(field.data_type(), depth - 1); + } + _ => {} + }; + + internal_err!("trying to unnest on invalid data type {:?}", data_type) +} + #[cfg(test)] mod tests { diff --git a/datafusion/expr/src/logical_plan/tree_node.rs b/datafusion/expr/src/logical_plan/tree_node.rs index 527248ad39c24..47088370a1d93 100644 --- a/datafusion/expr/src/logical_plan/tree_node.rs +++ b/datafusion/expr/src/logical_plan/tree_node.rs @@ -243,6 +243,7 @@ impl TreeNode for LogicalPlan { partition_by, file_type, options, + output_schema, }) => input.map_elements(f)?.update_data(|input| { LogicalPlan::Copy(CopyTo { input, @@ -250,6 +251,7 @@ impl TreeNode for LogicalPlan { partition_by, file_type, options, + output_schema, }) }), LogicalPlan::Ddl(ddl) => { @@ -313,9 +315,9 @@ impl TreeNode for LogicalPlan { LogicalPlan::Unnest(Unnest { input, exec_columns: input_columns, - dependency_indices, list_type_columns, struct_type_columns, + dependency_indices, schema, options, }) diff --git a/datafusion/expr/src/planner.rs b/datafusion/expr/src/planner.rs index 4c03f919312eb..067c7a94279fe 100644 --- a/datafusion/expr/src/planner.rs +++ b/datafusion/expr/src/planner.rs @@ -294,7 +294,7 @@ pub struct RawAggregateExpr { pub args: Vec, pub distinct: bool, pub filter: Option>, - pub order_by: Option>, + pub order_by: Vec, pub null_treatment: Option, } diff --git a/datafusion/expr/src/table_source.rs b/datafusion/expr/src/table_source.rs index d6155cfb5dc02..81fec4a1b06a5 100644 --- a/datafusion/expr/src/table_source.rs +++ b/datafusion/expr/src/table_source.rs @@ -32,7 +32,7 @@ use std::{any::Any, borrow::Cow}; /// the filter") are returned. Rows that evaluate to `false` or `NULL` are /// omitted. /// -/// [`TableProvider::scan`]: https://docs.rs/datafusion/latest/datafusion/datasource/provider/trait.TableProvider.html#tymethod.scan +/// [`TableProvider::scan`]: https://docs.rs/datafusion/latest/datafusion/datasource/trait.TableProvider.html#tymethod.scan #[derive(Debug, Clone, PartialEq, Eq)] pub enum TableProviderFilterPushDown { /// The filter cannot be used by the provider and will not be pushed down. @@ -89,7 +89,7 @@ impl std::fmt::Display for TableType { /// plan code be dependent on the DataFusion execution engine. Some projects use /// DataFusion's logical plans and have their own execution engine. /// -/// [`TableProvider`]: https://docs.rs/datafusion/latest/datafusion/datasource/provider/trait.TableProvider.html +/// [`TableProvider`]: https://docs.rs/datafusion/latest/datafusion/datasource/trait.TableProvider.html /// [`DefaultTableSource`]: https://docs.rs/datafusion/latest/datafusion/datasource/default_table_source/struct.DefaultTableSource.html pub trait TableSource: Sync + Send { fn as_any(&self) -> &dyn Any; diff --git a/datafusion/expr/src/test/function_stub.rs b/datafusion/expr/src/test/function_stub.rs index f310f31be3522..8f8e84c0d111d 100644 --- a/datafusion/expr/src/test/function_stub.rs +++ b/datafusion/expr/src/test/function_stub.rs @@ -60,7 +60,7 @@ pub fn sum(expr: Expr) -> Expr { vec![expr], false, None, - None, + vec![], None, )) } @@ -73,7 +73,7 @@ pub fn count(expr: Expr) -> Expr { vec![expr], false, None, - None, + vec![], None, )) } @@ -86,7 +86,7 @@ pub fn avg(expr: Expr) -> Expr { vec![expr], false, None, - None, + vec![], None, )) } @@ -282,7 +282,7 @@ pub fn min(expr: Expr) -> Expr { vec![expr], false, None, - None, + vec![], None, )) } @@ -363,7 +363,7 @@ pub fn max(expr: Expr) -> Expr { vec![expr], false, None, - None, + vec![], None, )) } diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs index bc617097f0011..b6c8eb627c775 100644 --- a/datafusion/expr/src/udaf.rs +++ b/datafusion/expr/src/udaf.rs @@ -158,7 +158,7 @@ impl AggregateUDF { args, false, None, - None, + vec![], None, )) } @@ -394,7 +394,7 @@ where /// fn get_doc() -> &'static Documentation { /// &DOCUMENTATION /// } -/// +/// /// /// Implement the AggregateUDFImpl trait for GeoMeanUdf /// impl AggregateUDFImpl for GeoMeanUdf { /// fn as_any(&self) -> &dyn Any { self } @@ -415,7 +415,7 @@ where /// ]) /// } /// fn documentation(&self) -> Option<&Documentation> { -/// Some(get_doc()) +/// Some(get_doc()) /// } /// } /// @@ -482,7 +482,7 @@ pub trait AggregateUDFImpl: Debug + Send + Sync { schema_name.write_fmt(format_args!(" FILTER (WHERE {filter})"))?; }; - if let Some(order_by) = order_by { + if !order_by.is_empty() { let clause = match self.is_ordered_set_aggregate() { true => "WITHIN GROUP", false => "ORDER BY", @@ -527,7 +527,7 @@ pub trait AggregateUDFImpl: Debug + Send + Sync { schema_name.write_fmt(format_args!(" FILTER (WHERE {filter})"))?; }; - if let Some(order_by) = order_by { + if !order_by.is_empty() { schema_name.write_fmt(format_args!( " ORDER BY [{}]", schema_name_from_sorts(order_by)? @@ -616,10 +616,11 @@ pub trait AggregateUDFImpl: Debug + Send + Sync { if let Some(fe) = filter { display_name.write_fmt(format_args!(" FILTER (WHERE {fe})"))?; } - if let Some(ob) = order_by { + if !order_by.is_empty() { display_name.write_fmt(format_args!( " ORDER BY [{}]", - ob.iter() + order_by + .iter() .map(|o| format!("{o}")) .collect::>() .join(", ") @@ -898,26 +899,35 @@ pub trait AggregateUDFImpl: Debug + Send + Sync { /// Return true if this aggregate UDF is equal to the other. /// /// Allows customizing the equality of aggregate UDFs. + /// *Must* be implemented explicitly if the UDF type has internal state. /// Must be consistent with [`Self::hash_value`] and follow the same rules as [`Eq`]: /// /// - reflexive: `a.equals(a)`; /// - symmetric: `a.equals(b)` implies `b.equals(a)`; /// - transitive: `a.equals(b)` and `b.equals(c)` implies `a.equals(c)`. /// - /// By default, compares [`Self::name`] and [`Self::signature`]. + /// By default, compares type, [`Self::name`], [`Self::aliases`] and [`Self::signature`]. fn equals(&self, other: &dyn AggregateUDFImpl) -> bool { - self.name() == other.name() && self.signature() == other.signature() + self.as_any().type_id() == other.as_any().type_id() + && self.name() == other.name() + && self.aliases() == other.aliases() + && self.signature() == other.signature() } /// Returns a hash value for this aggregate UDF. /// - /// Allows customizing the hash code of aggregate UDFs. Similarly to [`Hash`] and [`Eq`], - /// if [`Self::equals`] returns true for two UDFs, their `hash_value`s must be the same. + /// Allows customizing the hash code of aggregate UDFs. + /// *Must* be implemented explicitly whenever [`Self::equals`] is implemented. + /// + /// Similarly to [`Hash`] and [`Eq`], if [`Self::equals`] returns true for two UDFs, + /// their `hash_value`s must be the same. /// - /// By default, hashes [`Self::name`] and [`Self::signature`]. + /// By default, it is consistent with default implementation of [`Self::equals`]. fn hash_value(&self) -> u64 { let hasher = &mut DefaultHasher::new(); + self.as_any().type_id().hash(hasher); self.name().hash(hasher); + self.aliases().hash(hasher); self.signature().hash(hasher); hasher.finish() } diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index 81865a836d2cf..0266dd0550b09 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -21,8 +21,9 @@ use crate::async_udf::AsyncScalarUDF; use crate::expr::schema_name_from_exprs_comma_separated_without_space; use crate::simplify::{ExprSimplifyResult, SimplifyInfo}; use crate::sort_properties::{ExprProperties, SortProperties}; -use crate::{ColumnarValue, Documentation, Expr, Signature}; +use crate::{udf_equals_hash, ColumnarValue, Documentation, Expr, Signature}; use arrow::datatypes::{DataType, Field, FieldRef}; +use datafusion_common::config::ConfigOptions; use datafusion_common::{not_impl_err, ExprSchema, Result, ScalarValue}; use datafusion_expr_common::interval_arithmetic::Interval; use std::any::Any; @@ -311,6 +312,8 @@ pub struct ScalarFunctionArgs { /// or `return_field_from_args`) when creating the physical expression /// from the logical expression pub return_field: FieldRef, + /// The config options at execution time + pub config_options: Arc, } impl ScalarFunctionArgs { @@ -697,26 +700,35 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { /// Return true if this scalar UDF is equal to the other. /// /// Allows customizing the equality of scalar UDFs. + /// *Must* be implemented explicitly if the UDF type has internal state. /// Must be consistent with [`Self::hash_value`] and follow the same rules as [`Eq`]: /// /// - reflexive: `a.equals(a)`; /// - symmetric: `a.equals(b)` implies `b.equals(a)`; /// - transitive: `a.equals(b)` and `b.equals(c)` implies `a.equals(c)`. /// - /// By default, compares [`Self::name`] and [`Self::signature`]. + /// By default, compares type, [`Self::name`], [`Self::aliases`] and [`Self::signature`]. fn equals(&self, other: &dyn ScalarUDFImpl) -> bool { - self.name() == other.name() && self.signature() == other.signature() + self.as_any().type_id() == other.as_any().type_id() + && self.name() == other.name() + && self.aliases() == other.aliases() + && self.signature() == other.signature() } /// Returns a hash value for this scalar UDF. /// - /// Allows customizing the hash code of scalar UDFs. Similarly to [`Hash`] and [`Eq`], - /// if [`Self::equals`] returns true for two UDFs, their `hash_value`s must be the same. + /// Allows customizing the hash code of scalar UDFs. + /// *Must* be implemented explicitly whenever [`Self::equals`] is implemented. /// - /// By default, hashes [`Self::name`] and [`Self::signature`]. + /// Similarly to [`Hash`] and [`Eq`], if [`Self::equals`] returns true for two UDFs, + /// their `hash_value`s must be the same. + /// + /// By default, it is consistent with default implementation of [`Self::equals`]. fn hash_value(&self) -> u64 { let hasher = &mut DefaultHasher::new(); + self.as_any().type_id().hash(hasher); self.name().hash(hasher); + self.aliases().hash(hasher); self.signature().hash(hasher); hasher.finish() } @@ -738,6 +750,21 @@ struct AliasedScalarUDFImpl { aliases: Vec, } +impl PartialEq for AliasedScalarUDFImpl { + fn eq(&self, other: &Self) -> bool { + let Self { inner, aliases } = self; + inner.equals(other.inner.as_ref()) && aliases == &other.aliases + } +} + +impl Hash for AliasedScalarUDFImpl { + fn hash(&self, state: &mut H) { + let Self { inner, aliases } = self; + inner.hash_value().hash(state); + aliases.hash(state); + } +} + impl AliasedScalarUDFImpl { pub fn new( inner: Arc, @@ -822,20 +849,7 @@ impl ScalarUDFImpl for AliasedScalarUDFImpl { self.inner.coerce_types(arg_types) } - fn equals(&self, other: &dyn ScalarUDFImpl) -> bool { - if let Some(other) = other.as_any().downcast_ref::() { - self.inner.equals(other.inner.as_ref()) && self.aliases == other.aliases - } else { - false - } - } - - fn hash_value(&self) -> u64 { - let hasher = &mut DefaultHasher::new(); - self.inner.hash_value().hash(hasher); - self.aliases.hash(hasher); - hasher.finish() - } + udf_equals_hash!(ScalarUDFImpl); fn documentation(&self) -> Option<&Documentation> { self.inner.documentation() diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs index f5ab78e0c12a0..b19a083beffd2 100644 --- a/datafusion/expr/src/udwf.rs +++ b/datafusion/expr/src/udwf.rs @@ -362,26 +362,35 @@ pub trait WindowUDFImpl: Debug + Send + Sync { /// Return true if this window UDF is equal to the other. /// /// Allows customizing the equality of window UDFs. + /// *Must* be implemented explicitly if the UDF type has internal state. /// Must be consistent with [`Self::hash_value`] and follow the same rules as [`Eq`]: /// /// - reflexive: `a.equals(a)`; /// - symmetric: `a.equals(b)` implies `b.equals(a)`; /// - transitive: `a.equals(b)` and `b.equals(c)` implies `a.equals(c)`. /// - /// By default, compares [`Self::name`] and [`Self::signature`]. + /// By default, compares type, [`Self::name`], [`Self::aliases`] and [`Self::signature`]. fn equals(&self, other: &dyn WindowUDFImpl) -> bool { - self.name() == other.name() && self.signature() == other.signature() + self.as_any().type_id() == other.as_any().type_id() + && self.name() == other.name() + && self.aliases() == other.aliases() + && self.signature() == other.signature() } /// Returns a hash value for this window UDF. /// - /// Allows customizing the hash code of window UDFs. Similarly to [`Hash`] and [`Eq`], - /// if [`Self::equals`] returns true for two UDFs, their `hash_value`s must be the same. + /// Allows customizing the hash code of window UDFs. + /// *Must* be implemented explicitly whenever [`Self::equals`] is implemented. /// - /// By default, hashes [`Self::name`] and [`Self::signature`]. + /// Similarly to [`Hash`] and [`Eq`], if [`Self::equals`] returns true for two UDFs, + /// their `hash_value`s must be the same. + /// + /// By default, it is consistent with default implementation of [`Self::equals`]. fn hash_value(&self) -> u64 { let hasher = &mut DefaultHasher::new(); + self.as_any().type_id().hash(hasher); self.name().hash(hasher); + self.aliases().hash(hasher); self.signature().hash(hasher); hasher.finish() } diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs index 8950f5e450e03..e55415232897a 100644 --- a/datafusion/expr/src/utils.rs +++ b/datafusion/expr/src/utils.rs @@ -19,6 +19,7 @@ use std::cmp::Ordering; use std::collections::{BTreeSet, HashSet}; +use std::hash::Hasher; use std::sync::Arc; use crate::expr::{Alias, Sort, WildcardOptions, WindowFunctionParams}; @@ -1260,6 +1261,94 @@ pub fn collect_subquery_cols( }) } +/// Generates implementation of `equals` and `hash_value` methods for a trait, delegating +/// to [`PartialEq`] and [`Hash`] implementations on Self. +/// Meant to be used with traits representing user-defined functions (UDFs). +/// +/// Example showing generation of [`ScalarUDFImpl::equals`] and [`ScalarUDFImpl::hash_value`] +/// implementations. +/// +/// ``` +/// # use arrow::datatypes::DataType; +/// # use datafusion_expr::{udf_equals_hash, ScalarFunctionArgs, ScalarUDFImpl}; +/// # use datafusion_expr_common::columnar_value::ColumnarValue; +/// # use datafusion_expr_common::signature::Signature; +/// # use std::any::Any; +/// +/// // Implementing PartialEq & Hash is a prerequisite for using this macro, +/// // but the implementation can be derived. +/// #[derive(Debug, PartialEq, Hash)] +/// struct VarcharToTimestampTz { +/// safe: bool, +/// } +/// +/// impl ScalarUDFImpl for VarcharToTimestampTz { +/// /* other methods omitted for brevity */ +/// # fn as_any(&self) -> &dyn Any { +/// # self +/// # } +/// # +/// # fn name(&self) -> &str { +/// # "varchar_to_timestamp_tz" +/// # } +/// # +/// # fn signature(&self) -> &Signature { +/// # todo!() +/// # } +/// # +/// # fn return_type( +/// # &self, +/// # _arg_types: &[DataType], +/// # ) -> datafusion_common::Result { +/// # todo!() +/// # } +/// # +/// # fn invoke_with_args( +/// # &self, +/// # args: ScalarFunctionArgs, +/// # ) -> datafusion_common::Result { +/// # todo!() +/// # } +/// # +/// udf_equals_hash!(ScalarUDFImpl); +/// } +/// ``` +/// +/// [`ScalarUDFImpl::equals`]: crate::ScalarUDFImpl::equals +/// [`ScalarUDFImpl::hash_value`]: crate::ScalarUDFImpl::hash_value +#[macro_export] +macro_rules! udf_equals_hash { + ($udf_type:tt) => { + fn equals(&self, other: &dyn $udf_type) -> bool { + use ::core::any::Any; + use ::core::cmp::PartialEq; + let Some(other) = ::downcast_ref::(other.as_any()) + else { + return false; + }; + PartialEq::eq(self, other) + } + + fn hash_value(&self) -> u64 { + use ::std::any::type_name; + use ::std::hash::{DefaultHasher, Hash, Hasher}; + let hasher = &mut DefaultHasher::new(); + type_name::().hash(hasher); + Hash::hash(self, hasher); + Hasher::finish(hasher) + } + }; +} + +pub fn arc_ptr_eq(a: &Arc, b: &Arc) -> bool { + // Not necessarily equivalent to `Arc::ptr_eq` for fat pointers. + std::ptr::eq(Arc::as_ptr(a), Arc::as_ptr(b)) +} + +pub fn arc_ptr_hash(a: &Arc, hasher: &mut impl Hasher) { + std::ptr::hash(Arc::as_ptr(a), hasher) +} + #[cfg(test)] mod tests { use super::*; @@ -1268,9 +1357,13 @@ mod tests { expr::WindowFunction, expr_vec_fmt, grouping_set, lit, rollup, test::function_stub::{max_udaf, min_udaf, sum_udaf}, - Cast, ExprFunctionExt, WindowFunctionDefinition, + Cast, ExprFunctionExt, ScalarFunctionArgs, ScalarUDFImpl, + WindowFunctionDefinition, }; use arrow::datatypes::{UnionFields, UnionMode}; + use datafusion_expr_common::columnar_value::ColumnarValue; + use datafusion_expr_common::signature::Volatility; + use std::any::Any; #[test] fn test_group_window_expr_by_sort_keys_empty_case() -> Result<()> { @@ -1690,4 +1783,91 @@ mod tests { DataType::List(Arc::new(Field::new("my_union", union_type, true))); assert!(!can_hash(&list_union_type)); } + + #[test] + fn test_udf_equals_hash() { + #[derive(Debug, PartialEq, Hash)] + struct StatefulFunctionWithEqHash { + signature: Signature, + state: bool, + } + impl ScalarUDFImpl for StatefulFunctionWithEqHash { + fn as_any(&self) -> &dyn Any { + self + } + fn name(&self) -> &str { + "StatefulFunctionWithEqHash" + } + fn signature(&self) -> &Signature { + &self.signature + } + fn return_type(&self, _arg_types: &[DataType]) -> Result { + todo!() + } + fn invoke_with_args( + &self, + _args: ScalarFunctionArgs, + ) -> Result { + todo!() + } + } + + #[derive(Debug, PartialEq, Hash)] + struct StatefulFunctionWithEqHashWithUdfEqualsHash { + signature: Signature, + state: bool, + } + impl ScalarUDFImpl for StatefulFunctionWithEqHashWithUdfEqualsHash { + fn as_any(&self) -> &dyn Any { + self + } + fn name(&self) -> &str { + "StatefulFunctionWithEqHashWithUdfEqualsHash" + } + fn signature(&self) -> &Signature { + &self.signature + } + fn return_type(&self, _arg_types: &[DataType]) -> Result { + todo!() + } + fn invoke_with_args( + &self, + _args: ScalarFunctionArgs, + ) -> Result { + todo!() + } + udf_equals_hash!(ScalarUDFImpl); + } + + let signature = Signature::exact(vec![DataType::Utf8], Volatility::Immutable); + + // Sadly, without `udf_equals_hash!` macro, the equals and hash_value ignore state fields, + // even though the struct implements `PartialEq` and `Hash`. + let a: Box = Box::new(StatefulFunctionWithEqHash { + signature: signature.clone(), + state: true, + }); + let b: Box = Box::new(StatefulFunctionWithEqHash { + signature: signature.clone(), + state: false, + }); + assert!(a.equals(b.as_ref())); + assert_eq!(a.hash_value(), b.hash_value()); + + // With udf_equals_hash! macro, the equals and hash_value compare the state. + // even though the struct implements `PartialEq` and `Hash`. + let a: Box = + Box::new(StatefulFunctionWithEqHashWithUdfEqualsHash { + signature: signature.clone(), + state: true, + }); + let b: Box = + Box::new(StatefulFunctionWithEqHashWithUdfEqualsHash { + signature: signature.clone(), + state: false, + }); + assert!(!a.equals(b.as_ref())); + // This could be true, but it's very unlikely that boolean true and false hash the same + assert_ne!(a.hash_value(), b.hash_value()); + } } diff --git a/datafusion/expr/src/window_state.rs b/datafusion/expr/src/window_state.rs index a101b8fe4df63..014bed5aea562 100644 --- a/datafusion/expr/src/window_state.rs +++ b/datafusion/expr/src/window_state.rs @@ -34,7 +34,7 @@ use datafusion_common::{ }; /// Holds the state of evaluating a window function -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct WindowAggState { /// The range that we calculate the window function pub window_frame_range: Range, @@ -112,7 +112,7 @@ impl WindowAggState { } /// This object stores the window frame state for use in incremental calculations. -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum WindowFrameContext { /// ROWS frames are inherently stateless. Rows(Arc), @@ -240,7 +240,7 @@ impl WindowFrameContext { } /// State for each unique partition determined according to PARTITION BY column(s) -#[derive(Debug)] +#[derive(Debug, Clone, PartialEq)] pub struct PartitionBatchState { /// The record batch belonging to current partition pub record_batch: RecordBatch, @@ -282,7 +282,7 @@ impl PartitionBatchState { /// ranges of data while processing RANGE frames. /// Attribute `sort_options` stores the column ordering specified by the ORDER /// BY clause. This information is used to calculate the range. -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone)] pub struct WindowFrameStateRange { sort_options: Vec, } @@ -454,7 +454,7 @@ impl WindowFrameStateRange { /// This structure encapsulates all the state information we require as we /// scan groups of data while processing window frames. -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone)] pub struct WindowFrameStateGroups { /// A tuple containing group values and the row index where the group ends. /// Example: [[1, 1], [1, 1], [2, 1], [2, 1], ...] would correspond to diff --git a/datafusion/ffi/src/execution_plan.rs b/datafusion/ffi/src/execution_plan.rs index 14a0908c47954..70c957d8c3733 100644 --- a/datafusion/ffi/src/execution_plan.rs +++ b/datafusion/ffi/src/execution_plan.rs @@ -205,7 +205,8 @@ impl DisplayAs for ForeignExecutionPlan { DisplayFormatType::Default | DisplayFormatType::Verbose => { write!( f, - "FFI_ExecutionPlan(number_of_children={})", + "FFI_ExecutionPlan: {}, number_of_children={}", + self.name, self.children.len(), ) } @@ -390,7 +391,10 @@ mod tests { ); let buf = display.one_line().to_string(); - assert_eq!(buf.trim(), "FFI_ExecutionPlan(number_of_children=0)"); + assert_eq!( + buf.trim(), + "FFI_ExecutionPlan: empty-exec, number_of_children=0" + ); Ok(()) } diff --git a/datafusion/ffi/src/udaf/accumulator_args.rs b/datafusion/ffi/src/udaf/accumulator_args.rs index 874a2ac8b82eb..2cd2fa5f51035 100644 --- a/datafusion/ffi/src/udaf/accumulator_args.rs +++ b/datafusion/ffi/src/udaf/accumulator_args.rs @@ -75,6 +75,7 @@ impl TryFrom> for FFI_AccumulatorArgs { ignore_nulls: args.ignore_nulls, fun_definition: None, aggregate_function: None, + human_display: args.name.to_string(), }; let physical_expr_def = physical_expr_def.encode_to_vec().into(); diff --git a/datafusion/ffi/src/udaf/mod.rs b/datafusion/ffi/src/udaf/mod.rs index eb7a408ab1788..63d44110a657d 100644 --- a/datafusion/ffi/src/udaf/mod.rs +++ b/datafusion/ffi/src/udaf/mod.rs @@ -15,8 +15,6 @@ // specific language governing permissions and limitations // under the License. -use std::{ffi::c_void, sync::Arc}; - use abi_stable::{ std_types::{ROption, RResult, RStr, RString, RVec}, StableAbi, @@ -41,6 +39,8 @@ use datafusion::{ }; use datafusion_proto_common::from_proto::parse_proto_fields_to_fields; use groups_accumulator::{FFI_GroupsAccumulator, ForeignGroupsAccumulator}; +use std::hash::{DefaultHasher, Hash, Hasher}; +use std::{ffi::c_void, sync::Arc}; use crate::util::{rvec_wrapped_to_vec_fieldref, vec_fieldref_to_rvec_wrapped}; use crate::{ @@ -553,6 +553,34 @@ impl AggregateUDFImpl for ForeignAggregateUDF { Ok(rvec_wrapped_to_vec_datatype(&result_types)?) } } + + fn equals(&self, other: &dyn AggregateUDFImpl) -> bool { + let Some(other) = other.as_any().downcast_ref::() else { + return false; + }; + let Self { + signature, + aliases, + udaf, + } = self; + signature == &other.signature + && aliases == &other.aliases + && std::ptr::eq(udaf, &other.udaf) + } + + fn hash_value(&self) -> u64 { + let Self { + signature, + aliases, + udaf, + } = self; + let mut hasher = DefaultHasher::new(); + std::any::type_name::().hash(&mut hasher); + signature.hash(&mut hasher); + aliases.hash(&mut hasher); + std::ptr::hash(udaf, &mut hasher); + hasher.finish() + } } #[repr(C)] diff --git a/datafusion/ffi/src/udf/mod.rs b/datafusion/ffi/src/udf/mod.rs index 303acc783b2e4..e1276cc6d647d 100644 --- a/datafusion/ffi/src/udf/mod.rs +++ b/datafusion/ffi/src/udf/mod.rs @@ -32,7 +32,8 @@ use arrow::{ ffi::{from_ffi, to_ffi, FFI_ArrowSchema}, }; use arrow_schema::FieldRef; -use datafusion::logical_expr::ReturnFieldArgs; +use datafusion::config::ConfigOptions; +use datafusion::logical_expr::{udf_equals_hash, ReturnFieldArgs}; use datafusion::{ error::DataFusionError, logical_expr::type_coercion::functions::data_types_with_scalar_udf, @@ -46,6 +47,7 @@ use datafusion::{ use return_type_args::{ FFI_ReturnFieldArgs, ForeignReturnFieldArgs, ForeignReturnFieldArgsOwned, }; +use std::hash::{Hash, Hasher}; use std::{ffi::c_void, sync::Arc}; pub mod return_type_args; @@ -206,6 +208,8 @@ unsafe extern "C" fn invoke_with_args_fn_wrapper( arg_fields, number_rows, return_field, + // todo - should the config options go through serdes? + config_options: Arc::new(ConfigOptions::default()), }; let result = rresult_return!(udf @@ -286,6 +290,36 @@ pub struct ForeignScalarUDF { unsafe impl Send for ForeignScalarUDF {} unsafe impl Sync for ForeignScalarUDF {} +impl PartialEq for ForeignScalarUDF { + fn eq(&self, other: &Self) -> bool { + let Self { + name, + aliases, + udf, + signature, + } = self; + name == &other.name + && aliases == &other.aliases + && std::ptr::eq(udf, &other.udf) + && signature == &other.signature + } +} + +impl Hash for ForeignScalarUDF { + fn hash(&self, state: &mut H) { + let Self { + name, + aliases, + udf, + signature, + } = self; + name.hash(state); + aliases.hash(state); + std::ptr::hash(udf, state); + signature.hash(state); + } +} + impl TryFrom<&FFI_ScalarUDF> for ForeignScalarUDF { type Error = DataFusionError; @@ -347,6 +381,8 @@ impl ScalarUDFImpl for ForeignScalarUDF { arg_fields, number_rows, return_field, + // todo - should the config options go through serdes? + config_options: _config_options, } = invoke_args; let args = args @@ -407,6 +443,8 @@ impl ScalarUDFImpl for ForeignScalarUDF { Ok(rvec_wrapped_to_vec_datatype(&result_types)?) } } + + udf_equals_hash!(ScalarUDFImpl); } #[cfg(test)] diff --git a/datafusion/ffi/src/udwf/mod.rs b/datafusion/ffi/src/udwf/mod.rs index 504bf7a411f1a..b0b769219d283 100644 --- a/datafusion/ffi/src/udwf/mod.rs +++ b/datafusion/ffi/src/udwf/mod.rs @@ -15,8 +15,6 @@ // specific language governing permissions and limitations // under the License. -use std::{ffi::c_void, sync::Arc}; - use abi_stable::{ std_types::{ROption, RResult, RString, RVec}, StableAbi, @@ -42,6 +40,8 @@ use partition_evaluator::{FFI_PartitionEvaluator, ForeignPartitionEvaluator}; use partition_evaluator_args::{ FFI_PartitionEvaluatorArgs, ForeignPartitionEvaluatorArgs, }; +use std::hash::{DefaultHasher, Hash, Hasher}; +use std::{ffi::c_void, sync::Arc}; mod partition_evaluator; mod partition_evaluator_args; mod range; @@ -334,6 +334,38 @@ impl WindowUDFImpl for ForeignWindowUDF { let options: Option<&FFI_SortOptions> = self.udf.sort_options.as_ref().into(); options.map(|s| s.into()) } + + fn equals(&self, other: &dyn WindowUDFImpl) -> bool { + let Some(other) = other.as_any().downcast_ref::() else { + return false; + }; + let Self { + name, + aliases, + udf, + signature, + } = self; + name == &other.name + && aliases == &other.aliases + && std::ptr::eq(udf, &other.udf) + && signature == &other.signature + } + + fn hash_value(&self) -> u64 { + let Self { + name, + aliases, + udf, + signature, + } = self; + let mut hasher = DefaultHasher::new(); + std::any::type_name::().hash(&mut hasher); + name.hash(&mut hasher); + aliases.hash(&mut hasher); + std::ptr::hash(udf, &mut hasher); + signature.hash(&mut hasher); + hasher.finish() + } } #[repr(C)] diff --git a/datafusion/functions-aggregate-common/src/min_max.rs b/datafusion/functions-aggregate-common/src/min_max.rs index aa37abd618557..b02001753215f 100644 --- a/datafusion/functions-aggregate-common/src/min_max.rs +++ b/datafusion/functions-aggregate-common/src/min_max.rs @@ -20,11 +20,11 @@ use arrow::array::{ ArrayRef, AsArray as _, BinaryArray, BinaryViewArray, BooleanArray, Date32Array, Date64Array, Decimal128Array, Decimal256Array, DurationMicrosecondArray, - DurationMillisecondArray, DurationNanosecondArray, DurationSecondArray, Float16Array, - Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, - IntervalDayTimeArray, IntervalMonthDayNanoArray, IntervalYearMonthArray, - LargeBinaryArray, LargeStringArray, StringArray, StringViewArray, - Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, + DurationMillisecondArray, DurationNanosecondArray, DurationSecondArray, + FixedSizeBinaryArray, Float16Array, Float32Array, Float64Array, Int16Array, + Int32Array, Int64Array, Int8Array, IntervalDayTimeArray, IntervalMonthDayNanoArray, + IntervalYearMonthArray, LargeBinaryArray, LargeStringArray, StringArray, + StringViewArray, Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array, @@ -254,6 +254,12 @@ pub fn min_batch(values: &ArrayRef) -> Result { min_binary ) } + DataType::FixedSizeBinary(size) => { + let array = downcast_value!(&values, FixedSizeBinaryArray); + let value = compute::min_fixed_size_binary(array); + let value = value.map(|e| e.to_vec()); + ScalarValue::FixedSizeBinary(*size, value) + } DataType::BinaryView => { typed_min_max_batch_binary!( &values, @@ -339,6 +345,12 @@ pub fn max_batch(values: &ArrayRef) -> Result { max_binary ) } + DataType::FixedSizeBinary(size) => { + let array = downcast_value!(&values, FixedSizeBinaryArray); + let value = compute::max_fixed_size_binary(array); + let value = value.map(|e| e.to_vec()); + ScalarValue::FixedSizeBinary(*size, value) + } DataType::Struct(_) => min_max_batch_generic(values, Ordering::Less)?, DataType::List(_) => min_max_batch_generic(values, Ordering::Less)?, DataType::LargeList(_) => min_max_batch_generic(values, Ordering::Less)?, diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont.rs b/datafusion/functions-aggregate/src/approx_percentile_cont.rs index 9b0d62e936bce..55c8c847ad0a4 100644 --- a/datafusion/functions-aggregate/src/approx_percentile_cont.rs +++ b/datafusion/functions-aggregate/src/approx_percentile_cont.rs @@ -69,7 +69,7 @@ pub fn approx_percentile_cont( args, false, None, - Some(vec![order_by]), + vec![order_by], None, )) } diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs b/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs index 5180d45889620..ab847e8388691 100644 --- a/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs +++ b/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs @@ -17,6 +17,7 @@ use std::any::Any; use std::fmt::{Debug, Formatter}; +use std::hash::{DefaultHasher, Hash, Hasher}; use std::mem::size_of_val; use std::sync::Arc; @@ -186,6 +187,30 @@ impl AggregateUDFImpl for ApproxPercentileContWithWeight { fn documentation(&self) -> Option<&Documentation> { self.doc() } + + fn equals(&self, other: &dyn AggregateUDFImpl) -> bool { + let Some(other) = other.as_any().downcast_ref::() else { + return false; + }; + let Self { + signature, + approx_percentile_cont, + } = self; + signature == &other.signature + && approx_percentile_cont.equals(&other.approx_percentile_cont) + } + + fn hash_value(&self) -> u64 { + let Self { + signature, + approx_percentile_cont, + } = self; + let mut hasher = DefaultHasher::new(); + std::any::type_name::().hash(&mut hasher); + signature.hash(&mut hasher); + hasher.write_u64(approx_percentile_cont.hash_value()); + hasher.finish() + } } #[derive(Debug)] diff --git a/datafusion/functions-aggregate/src/bit_and_or_xor.rs b/datafusion/functions-aggregate/src/bit_and_or_xor.rs index 4512162ba5d33..8ca5d992a7fea 100644 --- a/datafusion/functions-aggregate/src/bit_and_or_xor.rs +++ b/datafusion/functions-aggregate/src/bit_and_or_xor.rs @@ -20,6 +20,7 @@ use std::any::Any; use std::collections::HashSet; use std::fmt::{Display, Formatter}; +use std::hash::{DefaultHasher, Hash, Hasher}; use std::mem::{size_of, size_of_val}; use ahash::RandomState; @@ -196,7 +197,7 @@ make_bitwise_udaf_expr_and_func!( ); /// The different types of bitwise operations that can be performed. -#[derive(Debug, Clone, Eq, PartialEq)] +#[derive(Debug, Clone, Eq, PartialEq, Hash)] enum BitwiseOperationType { And, Or, @@ -312,6 +313,38 @@ impl AggregateUDFImpl for BitwiseOperation { fn documentation(&self) -> Option<&Documentation> { Some(self.documentation) } + + fn equals(&self, other: &dyn AggregateUDFImpl) -> bool { + let Some(other) = other.as_any().downcast_ref::() else { + return false; + }; + let Self { + signature, + operation, + func_name, + documentation, + } = self; + signature == &other.signature + && operation == &other.operation + && func_name == &other.func_name + && documentation == &other.documentation + } + + fn hash_value(&self) -> u64 { + let Self { + signature, + operation, + func_name, + documentation, + } = self; + let mut hasher = DefaultHasher::new(); + std::any::type_name::().hash(&mut hasher); + signature.hash(&mut hasher); + operation.hash(&mut hasher); + func_name.hash(&mut hasher); + documentation.hash(&mut hasher); + hasher.finish() + } } struct BitAndAccumulator { @@ -478,7 +511,7 @@ impl Default for DistinctBitXorAccumulator { impl Accumulator for DistinctBitXorAccumulator where - T::Native: std::ops::BitXor + std::hash::Hash + Eq, + T::Native: std::ops::BitXor + Hash + Eq, { fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> { if values.is_empty() { diff --git a/datafusion/functions-aggregate/src/correlation.rs b/datafusion/functions-aggregate/src/correlation.rs index 0a7345245ca8c..2f23e56f7bc14 100644 --- a/datafusion/functions-aggregate/src/correlation.rs +++ b/datafusion/functions-aggregate/src/correlation.rs @@ -26,7 +26,7 @@ use arrow::array::{ downcast_array, Array, AsArray, BooleanArray, Float64Array, NullBufferBuilder, UInt64Array, }; -use arrow::compute::{and, filter, is_not_null, kernels::cast}; +use arrow::compute::{and, filter, is_not_null}; use arrow::datatypes::{FieldRef, Float64Type, UInt64Type}; use arrow::{ array::ArrayRef, @@ -38,10 +38,9 @@ use log::debug; use crate::covariance::CovarianceAccumulator; use crate::stddev::StddevAccumulator; -use datafusion_common::{plan_err, Result, ScalarValue}; +use datafusion_common::{Result, ScalarValue}; use datafusion_expr::{ function::{AccumulatorArgs, StateFieldsArgs}, - type_coercion::aggregates::NUMERICS, utils::format_state_name, Accumulator, AggregateUDFImpl, Documentation, Signature, Volatility, }; @@ -83,10 +82,13 @@ impl Default for Correlation { } impl Correlation { - /// Create a new COVAR_POP aggregate function + /// Create a new CORR aggregate function pub fn new() -> Self { Self { - signature: Signature::uniform(2, NUMERICS.to_vec(), Volatility::Immutable), + signature: Signature::exact( + vec![DataType::Float64, DataType::Float64], + Volatility::Immutable, + ), } } } @@ -105,11 +107,7 @@ impl AggregateUDFImpl for Correlation { &self.signature } - fn return_type(&self, arg_types: &[DataType]) -> Result { - if !arg_types[0].is_numeric() { - return plan_err!("Correlation requires numeric input types"); - } - + fn return_type(&self, _arg_types: &[DataType]) -> Result { Ok(DataType::Float64) } @@ -375,10 +373,8 @@ impl GroupsAccumulator for CorrelationGroupsAccumulator { self.sum_xx.resize(total_num_groups, 0.0); self.sum_yy.resize(total_num_groups, 0.0); - let array_x = &cast(&values[0], &DataType::Float64)?; - let array_x = downcast_array::(array_x); - let array_y = &cast(&values[1], &DataType::Float64)?; - let array_y = downcast_array::(array_y); + let array_x = downcast_array::(&values[0]); + let array_y = downcast_array::(&values[1]); accumulate_multiple( group_indices, diff --git a/datafusion/functions-aggregate/src/count.rs b/datafusion/functions-aggregate/src/count.rs index d1fe410321f62..09904bbad6ec5 100644 --- a/datafusion/functions-aggregate/src/count.rs +++ b/datafusion/functions-aggregate/src/count.rs @@ -73,7 +73,7 @@ pub fn count_distinct(expr: Expr) -> Expr { vec![expr], true, None, - None, + vec![], None, )) } diff --git a/datafusion/functions-aggregate/src/first_last.rs b/datafusion/functions-aggregate/src/first_last.rs index 790eaada6a095..0856237d08cb5 100644 --- a/datafusion/functions-aggregate/src/first_last.rs +++ b/datafusion/functions-aggregate/src/first_last.rs @@ -19,6 +19,7 @@ use std::any::Any; use std::fmt::Debug; +use std::hash::{DefaultHasher, Hash, Hasher}; use std::mem::size_of_val; use std::sync::Arc; @@ -55,31 +56,23 @@ create_func!(FirstValue, first_value_udaf); create_func!(LastValue, last_value_udaf); /// Returns the first value in a group of values. -pub fn first_value(expression: Expr, order_by: Option>) -> Expr { - if let Some(order_by) = order_by { - first_value_udaf() - .call(vec![expression]) - .order_by(order_by) - .build() - // guaranteed to be `Expr::AggregateFunction` - .unwrap() - } else { - first_value_udaf().call(vec![expression]) - } +pub fn first_value(expression: Expr, order_by: Vec) -> Expr { + first_value_udaf() + .call(vec![expression]) + .order_by(order_by) + .build() + // guaranteed to be `Expr::AggregateFunction` + .unwrap() } /// Returns the last value in a group of values. -pub fn last_value(expression: Expr, order_by: Option>) -> Expr { - if let Some(order_by) = order_by { - last_value_udaf() - .call(vec![expression]) - .order_by(order_by) - .build() - // guaranteed to be `Expr::AggregateFunction` - .unwrap() - } else { - last_value_udaf().call(vec![expression]) - } +pub fn last_value(expression: Expr, order_by: Vec) -> Expr { + last_value_udaf() + .call(vec![expression]) + .order_by(order_by) + .build() + // guaranteed to be `Expr::AggregateFunction` + .unwrap() } #[user_doc( @@ -300,6 +293,30 @@ impl AggregateUDFImpl for FirstValue { fn documentation(&self) -> Option<&Documentation> { self.doc() } + + fn equals(&self, other: &dyn AggregateUDFImpl) -> bool { + let Some(other) = other.as_any().downcast_ref::() else { + return false; + }; + let Self { + signature, + is_input_pre_ordered, + } = self; + signature == &other.signature + && is_input_pre_ordered == &other.is_input_pre_ordered + } + + fn hash_value(&self) -> u64 { + let Self { + signature, + is_input_pre_ordered, + } = self; + let mut hasher = DefaultHasher::new(); + std::any::type_name::().hash(&mut hasher); + signature.hash(&mut hasher); + is_input_pre_ordered.hash(&mut hasher); + hasher.finish() + } } // TODO: rename to PrimitiveGroupsAccumulator @@ -1220,6 +1237,30 @@ impl AggregateUDFImpl for LastValue { } } } + + fn equals(&self, other: &dyn AggregateUDFImpl) -> bool { + let Some(other) = other.as_any().downcast_ref::() else { + return false; + }; + let Self { + signature, + is_input_pre_ordered, + } = self; + signature == &other.signature + && is_input_pre_ordered == &other.is_input_pre_ordered + } + + fn hash_value(&self) -> u64 { + let Self { + signature, + is_input_pre_ordered, + } = self; + let mut hasher = DefaultHasher::new(); + std::any::type_name::().hash(&mut hasher); + signature.hash(&mut hasher); + is_input_pre_ordered.hash(&mut hasher); + hasher.finish() + } } /// This accumulator is used when there is no ordering specified for the diff --git a/datafusion/functions-aggregate/src/macros.rs b/datafusion/functions-aggregate/src/macros.rs index 18f27c3c4ae3b..6c6bf72838899 100644 --- a/datafusion/functions-aggregate/src/macros.rs +++ b/datafusion/functions-aggregate/src/macros.rs @@ -28,7 +28,7 @@ macro_rules! make_udaf_expr { vec![$($arg),*], false, None, - None, + vec![], None, )) } @@ -52,7 +52,7 @@ macro_rules! make_udaf_expr_and_func { args, false, None, - None, + vec![], None, )) } diff --git a/datafusion/functions-aggregate/src/min_max.rs b/datafusion/functions-aggregate/src/min_max.rs index 0bd36a14be76c..df92e047680b4 100644 --- a/datafusion/functions-aggregate/src/min_max.rs +++ b/datafusion/functions-aggregate/src/min_max.rs @@ -443,6 +443,21 @@ macro_rules! typed_min_max_string { }}; } +// min/max of two scalar string values with a prefix argument. +macro_rules! typed_min_max_string_arg { + ($VALUE:expr, $DELTA:expr, $SCALAR:ident, $OP:ident, $ARG:expr) => {{ + ScalarValue::$SCALAR( + $ARG, + match ($VALUE, $DELTA) { + (None, None) => None, + (Some(a), None) => Some(a.clone()), + (None, Some(b)) => Some(b.clone()), + (Some(a), Some(b)) => Some((a).$OP(b).clone()), + }, + ) + }}; +} + macro_rules! choose_min_max { (min) => { std::cmp::Ordering::Greater @@ -546,6 +561,16 @@ macro_rules! min_max { (ScalarValue::LargeBinary(lhs), ScalarValue::LargeBinary(rhs)) => { typed_min_max_string!(lhs, rhs, LargeBinary, $OP) } + (ScalarValue::FixedSizeBinary(lsize, lhs), ScalarValue::FixedSizeBinary(rsize, rhs)) => { + if lsize == rsize { + typed_min_max_string_arg!(lhs, rhs, FixedSizeBinary, $OP, *lsize) + } + else { + return internal_err!( + "MIN/MAX is not expected to receive FixedSizeBinary of incompatible sizes {:?}", + (lsize, rsize)) + } + } (ScalarValue::BinaryView(lhs), ScalarValue::BinaryView(rhs)) => { typed_min_max_string!(lhs, rhs, BinaryView, $OP) } diff --git a/datafusion/functions-aggregate/src/regr.rs b/datafusion/functions-aggregate/src/regr.rs index 0f84aa1323f52..4600f6570fac8 100644 --- a/datafusion/functions-aggregate/src/regr.rs +++ b/datafusion/functions-aggregate/src/regr.rs @@ -38,6 +38,7 @@ use datafusion_expr::{ }; use std::any::Any; use std::fmt::Debug; +use std::hash::{DefaultHasher, Hash, Hasher}; use std::mem::size_of_val; use std::sync::{Arc, LazyLock}; @@ -320,6 +321,34 @@ impl AggregateUDFImpl for Regr { fn documentation(&self) -> Option<&Documentation> { self.regr_type.documentation() } + + fn equals(&self, other: &dyn AggregateUDFImpl) -> bool { + let Some(other) = other.as_any().downcast_ref::() else { + return false; + }; + let Self { + signature, + regr_type, + func_name, + } = self; + signature == &other.signature + && regr_type == &other.regr_type + && func_name == &other.func_name + } + + fn hash_value(&self) -> u64 { + let Self { + signature, + regr_type, + func_name, + } = self; + let mut hasher = DefaultHasher::new(); + std::any::type_name::().hash(&mut hasher); + signature.hash(&mut hasher); + regr_type.hash(&mut hasher); + func_name.hash(&mut hasher); + hasher.finish() + } } /// `RegrAccumulator` is used to compute linear regression aggregate functions diff --git a/datafusion/functions-aggregate/src/stddev.rs b/datafusion/functions-aggregate/src/stddev.rs index bf6d21a808e78..2f9f1cac84d49 100644 --- a/datafusion/functions-aggregate/src/stddev.rs +++ b/datafusion/functions-aggregate/src/stddev.rs @@ -19,6 +19,7 @@ use std::any::Any; use std::fmt::{Debug, Formatter}; +use std::hash::{DefaultHasher, Hash, Hasher}; use std::mem::align_of_val; use std::sync::Arc; @@ -153,6 +154,23 @@ impl AggregateUDFImpl for Stddev { fn documentation(&self) -> Option<&Documentation> { self.doc() } + + fn equals(&self, other: &dyn AggregateUDFImpl) -> bool { + let Some(other) = other.as_any().downcast_ref::() else { + return false; + }; + let Self { signature, alias } = self; + signature == &other.signature && alias == &other.alias + } + + fn hash_value(&self) -> u64 { + let Self { signature, alias } = self; + let mut hasher = DefaultHasher::new(); + std::any::type_name::().hash(&mut hasher); + signature.hash(&mut hasher); + alias.hash(&mut hasher); + hasher.finish() + } } make_udaf_expr_and_func!( diff --git a/datafusion/functions-aggregate/src/string_agg.rs b/datafusion/functions-aggregate/src/string_agg.rs index 09199e19cffc8..56c5ee1aaa676 100644 --- a/datafusion/functions-aggregate/src/string_agg.rs +++ b/datafusion/functions-aggregate/src/string_agg.rs @@ -18,6 +18,7 @@ //! [`StringAgg`] accumulator for the `string_agg` function use std::any::Any; +use std::hash::{DefaultHasher, Hash, Hasher}; use std::mem::size_of_val; use crate::array_agg::ArrayAgg; @@ -180,6 +181,29 @@ impl AggregateUDFImpl for StringAgg { fn documentation(&self) -> Option<&Documentation> { self.doc() } + + fn equals(&self, other: &dyn AggregateUDFImpl) -> bool { + let Some(other) = other.as_any().downcast_ref::() else { + return false; + }; + let Self { + signature, + array_agg, + } = self; + signature == &other.signature && array_agg.equals(&other.array_agg) + } + + fn hash_value(&self) -> u64 { + let Self { + signature, + array_agg, + } = self; + let mut hasher = DefaultHasher::new(); + std::any::type_name::().hash(&mut hasher); + signature.hash(&mut hasher); + hasher.write_u64(array_agg.hash_value()); + hasher.finish() + } } #[derive(Debug)] diff --git a/datafusion/functions-nested/benches/map.rs b/datafusion/functions-nested/benches/map.rs index 55dd7ad144605..ca12dde1f5c39 100644 --- a/datafusion/functions-nested/benches/map.rs +++ b/datafusion/functions-nested/benches/map.rs @@ -21,16 +21,16 @@ use arrow::array::{Int32Array, ListArray, StringArray}; use arrow::buffer::{OffsetBuffer, ScalarBuffer}; use arrow::datatypes::{DataType, Field}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use rand::prelude::ThreadRng; -use rand::Rng; -use std::collections::HashSet; -use std::sync::Arc; - +use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::planner::ExprPlanner; use datafusion_expr::{ColumnarValue, Expr, ScalarFunctionArgs}; use datafusion_functions_nested::map::map_udf; use datafusion_functions_nested::planner::NestedFunctionPlanner; +use rand::prelude::ThreadRng; +use rand::Rng; +use std::collections::HashSet; +use std::sync::Arc; fn keys(rng: &mut ThreadRng) -> Vec { let mut keys = HashSet::with_capacity(1000); @@ -105,6 +105,7 @@ fn criterion_benchmark(c: &mut Criterion) { Field::new("a", values.data_type(), true).into(), ]; let return_field = Field::new("f", return_type, true).into(); + let config_options = Arc::new(ConfigOptions::default()); b.iter(|| { black_box( @@ -114,6 +115,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: 1, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .expect("map should work on valid values"), ); diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs index e2a755371ebc8..05bbd1e114796 100644 --- a/datafusion/functions-window/src/lead_lag.rs +++ b/datafusion/functions-window/src/lead_lag.rs @@ -35,6 +35,7 @@ use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use std::any::Any; use std::cmp::min; use std::collections::VecDeque; +use std::hash::{DefaultHasher, Hash, Hasher}; use std::ops::{Neg, Range}; use std::sync::{Arc, LazyLock}; @@ -93,7 +94,7 @@ pub fn lead( lead_udwf().call(vec![arg, shift_offset_lit, default_lit]) } -#[derive(Debug)] +#[derive(Debug, PartialEq, Eq, Hash)] enum WindowShiftKind { Lag, Lead, @@ -298,6 +299,23 @@ impl WindowUDFImpl for WindowShift { WindowShiftKind::Lead => Some(get_lead_doc()), } } + + fn equals(&self, other: &dyn WindowUDFImpl) -> bool { + let Some(other) = other.as_any().downcast_ref::() else { + return false; + }; + let Self { signature, kind } = self; + signature == &other.signature && kind == &other.kind + } + + fn hash_value(&self) -> u64 { + let Self { signature, kind } = self; + let mut hasher = DefaultHasher::new(); + std::any::type_name::().hash(&mut hasher); + signature.hash(&mut hasher); + kind.hash(&mut hasher); + hasher.finish() + } } /// When `lead`/`lag` is evaluated on a `NULL` expression we attempt to diff --git a/datafusion/functions-window/src/nth_value.rs b/datafusion/functions-window/src/nth_value.rs index 0b83e1ff9f084..4c7dd995ec8c2 100644 --- a/datafusion/functions-window/src/nth_value.rs +++ b/datafusion/functions-window/src/nth_value.rs @@ -35,6 +35,7 @@ use field::WindowUDFFieldArgs; use std::any::Any; use std::cmp::Ordering; use std::fmt::Debug; +use std::hash::{DefaultHasher, Hash, Hasher}; use std::ops::Range; use std::sync::LazyLock; @@ -76,7 +77,7 @@ pub fn nth_value(arg: datafusion_expr::Expr, n: i64) -> datafusion_expr::Expr { } /// Tag to differentiate special use cases of the NTH_VALUE built-in window function. -#[derive(Debug, Copy, Clone)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub enum NthValueKind { First, Last, @@ -335,6 +336,23 @@ impl WindowUDFImpl for NthValue { NthValueKind::Nth => Some(get_nth_value_doc()), } } + + fn equals(&self, other: &dyn WindowUDFImpl) -> bool { + let Some(other) = other.as_any().downcast_ref::() else { + return false; + }; + let Self { signature, kind } = self; + signature == &other.signature && kind == &other.kind + } + + fn hash_value(&self) -> u64 { + let Self { signature, kind } = self; + let mut hasher = DefaultHasher::new(); + std::any::type_name::().hash(&mut hasher); + signature.hash(&mut hasher); + kind.hash(&mut hasher); + hasher.finish() + } } #[derive(Debug, Clone)] diff --git a/datafusion/functions-window/src/rank.rs b/datafusion/functions-window/src/rank.rs index 969a957cddd9c..5099b25e1d1fb 100644 --- a/datafusion/functions-window/src/rank.rs +++ b/datafusion/functions-window/src/rank.rs @@ -36,6 +36,7 @@ use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; use field::WindowUDFFieldArgs; use std::any::Any; use std::fmt::Debug; +use std::hash::{DefaultHasher, Hash, Hasher}; use std::iter; use std::ops::Range; use std::sync::{Arc, LazyLock}; @@ -95,7 +96,7 @@ impl Rank { } } -#[derive(Debug, Copy, Clone)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub enum RankType { Basic, Dense, @@ -242,6 +243,34 @@ impl WindowUDFImpl for Rank { RankType::Percent => Some(get_percent_rank_doc()), } } + + fn equals(&self, other: &dyn WindowUDFImpl) -> bool { + let Some(other) = other.as_any().downcast_ref::() else { + return false; + }; + let Self { + name, + signature, + rank_type, + } = self; + name == &other.name + && signature == &other.signature + && rank_type == &other.rank_type + } + + fn hash_value(&self) -> u64 { + let Self { + name, + signature, + rank_type, + } = self; + let mut hasher = DefaultHasher::new(); + std::any::type_name::().hash(&mut hasher); + name.hash(&mut hasher); + signature.hash(&mut hasher); + rank_type.hash(&mut hasher); + hasher.finish() + } } /// State for the RANK(rank) built-in window function. diff --git a/datafusion/functions/benches/ascii.rs b/datafusion/functions/benches/ascii.rs index 1c7023f4497e6..55471817d2778 100644 --- a/datafusion/functions/benches/ascii.rs +++ b/datafusion/functions/benches/ascii.rs @@ -20,6 +20,7 @@ mod helper; use arrow::datatypes::{DataType, Field}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::ScalarFunctionArgs; use helper::gen_string_array; use std::sync::Arc; @@ -46,6 +47,7 @@ fn criterion_benchmark(c: &mut Criterion) { let arg_fields = vec![Field::new("a", args_string_ascii[0].data_type(), true).into()]; let return_field = Field::new("f", DataType::Utf8, true).into(); + let config_options = Arc::new(ConfigOptions::default()); c.bench_function( format!("ascii/string_ascii_only (null_density={null_density})").as_str(), @@ -56,6 +58,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: N_ROWS, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), })) }) }, @@ -76,6 +79,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: N_ROWS, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), })) }) }, @@ -102,6 +106,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: N_ROWS, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), })) }) }, @@ -122,6 +127,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: N_ROWS, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), })) }) }, diff --git a/datafusion/functions/benches/character_length.rs b/datafusion/functions/benches/character_length.rs index b4a9e917f4160..edb61c013e242 100644 --- a/datafusion/functions/benches/character_length.rs +++ b/datafusion/functions/benches/character_length.rs @@ -19,6 +19,7 @@ extern crate criterion; use arrow::datatypes::{DataType, Field}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::ScalarFunctionArgs; use helper::gen_string_array; use std::sync::Arc; @@ -30,6 +31,7 @@ fn criterion_benchmark(c: &mut Criterion) { let character_length = datafusion_functions::unicode::character_length(); let return_field = Arc::new(Field::new("f", DataType::Utf8, true)); + let config_options = Arc::new(ConfigOptions::default()); let n_rows = 8192; for str_len in [8, 32, 128, 4096] { @@ -51,6 +53,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: n_rows, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), })) }) }, @@ -74,6 +77,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: n_rows, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), })) }) }, @@ -97,6 +101,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: n_rows, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), })) }) }, @@ -120,6 +125,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: n_rows, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), })) }) }, diff --git a/datafusion/functions/benches/chr.rs b/datafusion/functions/benches/chr.rs index 6a956bb788127..ec3f188f90844 100644 --- a/datafusion/functions/benches/chr.rs +++ b/datafusion/functions/benches/chr.rs @@ -24,6 +24,7 @@ use datafusion_functions::string::chr; use rand::{Rng, SeedableRng}; use arrow::datatypes::{DataType, Field}; +use datafusion_common::config::ConfigOptions; use rand::rngs::StdRng; use std::sync::Arc; @@ -55,6 +56,7 @@ fn criterion_benchmark(c: &mut Criterion) { .enumerate() .map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into()) .collect::>(); + let config_options = Arc::new(ConfigOptions::default()); c.bench_function("chr", |b| { b.iter(|| { @@ -65,6 +67,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), }) .unwrap(), ) diff --git a/datafusion/functions/benches/concat.rs b/datafusion/functions/benches/concat.rs index d350c03c497bb..15f9ffbd78025 100644 --- a/datafusion/functions/benches/concat.rs +++ b/datafusion/functions/benches/concat.rs @@ -19,6 +19,7 @@ use arrow::array::ArrayRef; use arrow::datatypes::{DataType, Field}; use arrow::util::bench_util::create_string_array_with_len; use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::string::concat; @@ -44,6 +45,7 @@ fn criterion_benchmark(c: &mut Criterion) { Field::new(format!("arg_{idx}"), arg.data_type(), true).into() }) .collect::>(); + let config_options = Arc::new(ConfigOptions::default()); let mut group = c.benchmark_group("concat function"); group.bench_function(BenchmarkId::new("concat", size), |b| { @@ -56,6 +58,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), }) .unwrap(), ) diff --git a/datafusion/functions/benches/cot.rs b/datafusion/functions/benches/cot.rs index a32e0d834672c..937d092cc0282 100644 --- a/datafusion/functions/benches/cot.rs +++ b/datafusion/functions/benches/cot.rs @@ -26,6 +26,7 @@ use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::math::cot; use arrow::datatypes::{DataType, Field}; +use datafusion_common::config::ConfigOptions; use std::sync::Arc; fn criterion_benchmark(c: &mut Criterion) { @@ -40,6 +41,7 @@ fn criterion_benchmark(c: &mut Criterion) { Field::new(format!("arg_{idx}"), arg.data_type(), true).into() }) .collect::>(); + let config_options = Arc::new(ConfigOptions::default()); c.bench_function(&format!("cot f32 array: {size}"), |b| { b.iter(|| { @@ -50,6 +52,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Field::new("f", DataType::Float32, true).into(), + config_options: Arc::clone(&config_options), }) .unwrap(), ) @@ -75,6 +78,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .unwrap(), ) diff --git a/datafusion/functions/benches/date_bin.rs b/datafusion/functions/benches/date_bin.rs index ac766a002576c..ea8705984f386 100644 --- a/datafusion/functions/benches/date_bin.rs +++ b/datafusion/functions/benches/date_bin.rs @@ -22,12 +22,12 @@ use std::sync::Arc; use arrow::array::{Array, ArrayRef, TimestampSecondArray}; use arrow::datatypes::Field; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; -use rand::rngs::ThreadRng; -use rand::Rng; - use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::datetime::date_bin; +use rand::rngs::ThreadRng; +use rand::Rng; fn timestamps(rng: &mut ThreadRng) -> TimestampSecondArray { let mut seconds = vec![]; @@ -55,6 +55,8 @@ fn criterion_benchmark(c: &mut Criterion) { Field::new("a", interval.data_type(), true).into(), Field::new("b", timestamps.data_type(), true).into(), ]; + let config_options = Arc::new(ConfigOptions::default()); + b.iter(|| { black_box( udf.invoke_with_args(ScalarFunctionArgs { @@ -62,6 +64,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: batch_len, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .expect("date_bin should work on valid values"), ) diff --git a/datafusion/functions/benches/date_trunc.rs b/datafusion/functions/benches/date_trunc.rs index ad4d0d0fbb796..70d372429b2d0 100644 --- a/datafusion/functions/benches/date_trunc.rs +++ b/datafusion/functions/benches/date_trunc.rs @@ -22,12 +22,12 @@ use std::sync::Arc; use arrow::array::{Array, ArrayRef, TimestampSecondArray}; use arrow::datatypes::Field; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; -use rand::rngs::ThreadRng; -use rand::Rng; - use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::datetime::date_trunc; +use rand::rngs::ThreadRng; +use rand::Rng; fn timestamps(rng: &mut ThreadRng) -> TimestampSecondArray { let mut seconds = vec![]; @@ -60,6 +60,8 @@ fn criterion_benchmark(c: &mut Criterion) { .return_type(&args.iter().map(|arg| arg.data_type()).collect::>()) .unwrap(); let return_field = Arc::new(Field::new("f", return_type, true)); + let config_options = Arc::new(ConfigOptions::default()); + b.iter(|| { black_box( udf.invoke_with_args(ScalarFunctionArgs { @@ -67,6 +69,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: batch_len, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .expect("date_trunc should work on valid values"), ) diff --git a/datafusion/functions/benches/encoding.rs b/datafusion/functions/benches/encoding.rs index 830e0324766f7..dc2529cd9fd76 100644 --- a/datafusion/functions/benches/encoding.rs +++ b/datafusion/functions/benches/encoding.rs @@ -21,12 +21,15 @@ use arrow::array::Array; use arrow::datatypes::{DataType, Field}; use arrow::util::bench_util::create_string_array_with_len; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::encoding; use std::sync::Arc; fn criterion_benchmark(c: &mut Criterion) { let decode = encoding::decode(); + let config_options = Arc::new(ConfigOptions::default()); + for size in [1024, 4096, 8192] { let str_array = Arc::new(create_string_array_with_len::(size, 0.2, 32)); c.bench_function(&format!("base64_decode/{size}"), |b| { @@ -40,6 +43,7 @@ fn criterion_benchmark(c: &mut Criterion) { ], number_rows: size, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), }) .unwrap(); @@ -57,6 +61,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), }) .unwrap(), ) @@ -75,6 +80,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields, number_rows: size, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), }) .unwrap(); @@ -93,6 +99,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .unwrap(), ) diff --git a/datafusion/functions/benches/find_in_set.rs b/datafusion/functions/benches/find_in_set.rs index bad540f049e28..df7d7cc09dd23 100644 --- a/datafusion/functions/benches/find_in_set.rs +++ b/datafusion/functions/benches/find_in_set.rs @@ -23,6 +23,7 @@ use arrow::util::bench_util::{ create_string_array_with_len, create_string_view_array_with_len, }; use criterion::{black_box, criterion_group, criterion_main, Criterion, SamplingMode}; +use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use rand::distr::Alphanumeric; @@ -165,6 +166,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: n_rows, return_field: Arc::clone(&return_field), + config_options: Arc::new(ConfigOptions::default()), })) }) }); @@ -182,6 +184,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: n_rows, return_field: Arc::clone(&return_field), + config_options: Arc::new(ConfigOptions::default()), })) }) }); @@ -203,6 +206,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: n_rows, return_field: Arc::clone(&return_field), + config_options: Arc::new(ConfigOptions::default()), })) }) }); @@ -213,6 +217,8 @@ fn criterion_benchmark(c: &mut Criterion) { .map(|arg| Field::new("a", arg.data_type().clone(), true).into()) .collect::>(); let return_field = Arc::new(Field::new("f", DataType::Int32, true)); + let config_options = Arc::new(ConfigOptions::default()); + group.bench_function(format!("string_view_len_{str_len}"), |b| { b.iter(|| { black_box(find_in_set.invoke_with_args(ScalarFunctionArgs { @@ -220,6 +226,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: n_rows, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), })) }) }); diff --git a/datafusion/functions/benches/gcd.rs b/datafusion/functions/benches/gcd.rs index f700d31123a9d..913ed523543e0 100644 --- a/datafusion/functions/benches/gcd.rs +++ b/datafusion/functions/benches/gcd.rs @@ -23,6 +23,7 @@ use arrow::{ datatypes::DataType, }; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::math::gcd; @@ -42,6 +43,7 @@ fn criterion_benchmark(c: &mut Criterion) { let array_a = ColumnarValue::Array(generate_i64_array(n_rows)); let array_b = ColumnarValue::Array(generate_i64_array(n_rows)); let udf = gcd(); + let config_options = Arc::new(ConfigOptions::default()); c.bench_function("gcd both array", |b| { b.iter(|| { @@ -54,6 +56,7 @@ fn criterion_benchmark(c: &mut Criterion) { ], number_rows: 0, return_field: Field::new("f", DataType::Int64, true).into(), + config_options: Arc::clone(&config_options), }) .expect("date_bin should work on valid values"), ) @@ -74,6 +77,7 @@ fn criterion_benchmark(c: &mut Criterion) { ], number_rows: 0, return_field: Field::new("f", DataType::Int64, true).into(), + config_options: Arc::clone(&config_options), }) .expect("date_bin should work on valid values"), ) @@ -94,6 +98,7 @@ fn criterion_benchmark(c: &mut Criterion) { ], number_rows: 0, return_field: Field::new("f", DataType::Int64, true).into(), + config_options: Arc::clone(&config_options), }) .expect("date_bin should work on valid values"), ) diff --git a/datafusion/functions/benches/initcap.rs b/datafusion/functions/benches/initcap.rs index f89b11dff8fbe..7562e990ca16c 100644 --- a/datafusion/functions/benches/initcap.rs +++ b/datafusion/functions/benches/initcap.rs @@ -23,6 +23,7 @@ use arrow::util::bench_util::{ create_string_array_with_len, create_string_view_array_with_len, }; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::unicode; use std::sync::Arc; @@ -56,6 +57,7 @@ fn criterion_benchmark(c: &mut Criterion) { Field::new(format!("arg_{idx}"), arg.data_type(), true).into() }) .collect::>(); + let config_options = Arc::new(ConfigOptions::default()); c.bench_function( format!("initcap string view shorter than 12 [size={size}]").as_str(), @@ -66,6 +68,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Field::new("f", DataType::Utf8View, true).into(), + config_options: Arc::clone(&config_options), })) }) }, @@ -81,6 +84,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Field::new("f", DataType::Utf8View, true).into(), + config_options: Arc::clone(&config_options), })) }) }, @@ -94,6 +98,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), })) }) }); diff --git a/datafusion/functions/benches/isnan.rs b/datafusion/functions/benches/isnan.rs index 49d0a9e326dd7..f59c7af939ab2 100644 --- a/datafusion/functions/benches/isnan.rs +++ b/datafusion/functions/benches/isnan.rs @@ -23,6 +23,7 @@ use arrow::{ util::bench_util::create_primitive_array, }; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::math::isnan; use std::sync::Arc; @@ -39,6 +40,7 @@ fn criterion_benchmark(c: &mut Criterion) { Field::new(format!("arg_{idx}"), arg.data_type(), true).into() }) .collect::>(); + let config_options = Arc::new(ConfigOptions::default()); c.bench_function(&format!("isnan f32 array: {size}"), |b| { b.iter(|| { @@ -49,6 +51,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Field::new("f", DataType::Boolean, true).into(), + config_options: Arc::clone(&config_options), }) .unwrap(), ) @@ -72,6 +75,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Field::new("f", DataType::Boolean, true).into(), + config_options: Arc::clone(&config_options), }) .unwrap(), ) diff --git a/datafusion/functions/benches/iszero.rs b/datafusion/functions/benches/iszero.rs index 6d1d34c7a8320..9752a9364b9f3 100644 --- a/datafusion/functions/benches/iszero.rs +++ b/datafusion/functions/benches/iszero.rs @@ -23,6 +23,7 @@ use arrow::{ util::bench_util::create_primitive_array, }; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::math::iszero; use std::sync::Arc; @@ -41,6 +42,7 @@ fn criterion_benchmark(c: &mut Criterion) { }) .collect::>(); let return_field = Arc::new(Field::new("f", DataType::Boolean, true)); + let config_options = Arc::new(ConfigOptions::default()); c.bench_function(&format!("iszero f32 array: {size}"), |b| { b.iter(|| { @@ -51,6 +53,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: batch_len, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .unwrap(), ) @@ -77,6 +80,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: batch_len, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .unwrap(), ) diff --git a/datafusion/functions/benches/lower.rs b/datafusion/functions/benches/lower.rs index cdf1529c108c0..83d437c6caa63 100644 --- a/datafusion/functions/benches/lower.rs +++ b/datafusion/functions/benches/lower.rs @@ -23,6 +23,7 @@ use arrow::util::bench_util::{ create_string_array_with_len, create_string_view_array_with_len, }; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::string; use std::sync::Arc; @@ -122,6 +123,8 @@ fn create_args5( fn criterion_benchmark(c: &mut Criterion) { let lower = string::lower(); + let config_options = Arc::new(ConfigOptions::default()); + for size in [1024, 4096, 8192] { let args = create_args1(size, 32); let arg_fields = args @@ -140,6 +143,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), })) }) }); @@ -161,6 +165,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), })) }) }); @@ -184,6 +189,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), })) }) }, @@ -217,6 +223,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), })) }), ); @@ -231,6 +238,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), })) }), ); @@ -246,6 +254,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), })) }), ); diff --git a/datafusion/functions/benches/ltrim.rs b/datafusion/functions/benches/ltrim.rs index 7a44f40a689a4..2712223506b9e 100644 --- a/datafusion/functions/benches/ltrim.rs +++ b/datafusion/functions/benches/ltrim.rs @@ -23,6 +23,7 @@ use criterion::{ black_box, criterion_group, criterion_main, measurement::Measurement, BenchmarkGroup, Criterion, SamplingMode, }; +use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDF}; use datafusion_functions::string; @@ -137,6 +138,8 @@ fn run_with_string_type( .enumerate() .map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into()) .collect::>(); + let config_options = Arc::new(ConfigOptions::default()); + group.bench_function( format!( "{string_type} [size={size}, len_before={len}, len_after={remaining_len}]", @@ -149,6 +152,7 @@ fn run_with_string_type( arg_fields: arg_fields.clone(), number_rows: size, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), })) }) }, diff --git a/datafusion/functions/benches/make_date.rs b/datafusion/functions/benches/make_date.rs index e1f609fbb35c0..f0494a9d3b4e4 100644 --- a/datafusion/functions/benches/make_date.rs +++ b/datafusion/functions/benches/make_date.rs @@ -22,12 +22,12 @@ use std::sync::Arc; use arrow::array::{Array, ArrayRef, Int32Array}; use arrow::datatypes::{DataType, Field}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use rand::rngs::ThreadRng; -use rand::Rng; - +use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::datetime::make_date; +use rand::rngs::ThreadRng; +use rand::Rng; fn years(rng: &mut ThreadRng) -> Int32Array { let mut years = vec![]; @@ -69,6 +69,7 @@ fn criterion_benchmark(c: &mut Criterion) { Field::new("a", days.data_type(), true).into(), ]; let return_field = Field::new("f", DataType::Date32, true).into(); + let config_options = Arc::new(ConfigOptions::default()); b.iter(|| { black_box( @@ -78,6 +79,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: batch_len, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .expect("make_date should work on valid values"), ) @@ -97,6 +99,8 @@ fn criterion_benchmark(c: &mut Criterion) { Field::new("a", days.data_type(), true).into(), ]; let return_field = Field::new("f", DataType::Date32, true).into(); + let config_options = Arc::new(ConfigOptions::default()); + b.iter(|| { black_box( make_date() @@ -105,6 +109,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: batch_len, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .expect("make_date should work on valid values"), ) @@ -124,6 +129,8 @@ fn criterion_benchmark(c: &mut Criterion) { Field::new("a", days.data_type(), true).into(), ]; let return_field = Field::new("f", DataType::Date32, true).into(); + let config_options = Arc::new(ConfigOptions::default()); + b.iter(|| { black_box( make_date() @@ -132,6 +139,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: batch_len, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .expect("make_date should work on valid values"), ) @@ -148,6 +156,7 @@ fn criterion_benchmark(c: &mut Criterion) { Field::new("a", day.data_type(), true).into(), ]; let return_field = Field::new("f", DataType::Date32, true).into(); + let config_options = Arc::new(ConfigOptions::default()); b.iter(|| { black_box( @@ -157,6 +166,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: 1, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .expect("make_date should work on valid values"), ) diff --git a/datafusion/functions/benches/nullif.rs b/datafusion/functions/benches/nullif.rs index 4ac977af9d428..93ec687c4d0e4 100644 --- a/datafusion/functions/benches/nullif.rs +++ b/datafusion/functions/benches/nullif.rs @@ -20,6 +20,7 @@ extern crate criterion; use arrow::datatypes::{DataType, Field}; use arrow::util::bench_util::create_string_array_with_len; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::core::nullif; @@ -40,6 +41,7 @@ fn criterion_benchmark(c: &mut Criterion) { Field::new(format!("arg_{idx}"), arg.data_type(), true).into() }) .collect::>(); + let config_options = Arc::new(ConfigOptions::default()); c.bench_function(&format!("nullif scalar array: {size}"), |b| { b.iter(|| { @@ -50,6 +52,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), }) .unwrap(), ) diff --git a/datafusion/functions/benches/pad.rs b/datafusion/functions/benches/pad.rs index d954ff452ed56..125559269a4f6 100644 --- a/datafusion/functions/benches/pad.rs +++ b/datafusion/functions/benches/pad.rs @@ -21,6 +21,7 @@ use arrow::util::bench_util::{ create_string_array_with_len, create_string_view_array_with_len, }; use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_common::DataFusionError; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::unicode::{lpad, rpad}; @@ -106,12 +107,14 @@ fn invoke_pad_with_args( .enumerate() .map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into()) .collect::>(); + let config_options = Arc::new(ConfigOptions::default()); let scalar_args = ScalarFunctionArgs { args: args.clone(), arg_fields, number_rows, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), }; if left_pad { diff --git a/datafusion/functions/benches/random.rs b/datafusion/functions/benches/random.rs index dc1e280b93b13..ac92aed586bae 100644 --- a/datafusion/functions/benches/random.rs +++ b/datafusion/functions/benches/random.rs @@ -19,14 +19,16 @@ extern crate criterion; use arrow::datatypes::{DataType, Field}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl}; use datafusion_functions::math::random::RandomFunc; use std::sync::Arc; fn criterion_benchmark(c: &mut Criterion) { let random_func = RandomFunc::new(); - let return_field = Field::new("f", DataType::Float64, true).into(); + let config_options = Arc::new(ConfigOptions::default()); + // Benchmark to evaluate 1M rows in batch size 8192 let iterations = 1_000_000 / 8192; // Calculate how many iterations are needed to reach approximately 1M rows c.bench_function("random_1M_rows_batch_8192", |b| { @@ -39,6 +41,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: vec![], number_rows: 8192, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .unwrap(), ); @@ -59,6 +62,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: vec![], number_rows: 128, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .unwrap(), ); diff --git a/datafusion/functions/benches/regx.rs b/datafusion/functions/benches/regx.rs index c0b50ad62f64a..df55748d268fc 100644 --- a/datafusion/functions/benches/regx.rs +++ b/datafusion/functions/benches/regx.rs @@ -23,6 +23,7 @@ use arrow::compute::cast; use arrow::datatypes::DataType; use criterion::{black_box, criterion_group, criterion_main, Criterion}; use datafusion_functions::regex::regexpcount::regexp_count_func; +use datafusion_functions::regex::regexpinstr::regexp_instr_func; use datafusion_functions::regex::regexplike::regexp_like; use datafusion_functions::regex::regexpmatch::regexp_match; use datafusion_functions::regex::regexpreplace::regexp_replace; @@ -71,6 +72,15 @@ fn start(rng: &mut ThreadRng) -> Int64Array { Int64Array::from(data) } +fn n(rng: &mut ThreadRng) -> Int64Array { + let mut data: Vec = vec![]; + for _ in 0..1000 { + data.push(rng.random_range(1..5)); + } + + Int64Array::from(data) +} + fn flags(rng: &mut ThreadRng) -> StringArray { let samples = [Some("i".to_string()), Some("im".to_string()), None]; let mut sb = StringBuilder::new(); @@ -86,6 +96,15 @@ fn flags(rng: &mut ThreadRng) -> StringArray { sb.finish() } +fn subexp(rng: &mut ThreadRng) -> Int64Array { + let mut data: Vec = vec![]; + for _ in 0..1000 { + data.push(rng.random_range(1..5)); + } + + Int64Array::from(data) +} + fn criterion_benchmark(c: &mut Criterion) { c.bench_function("regexp_count_1000 string", |b| { let mut rng = rand::rng(); @@ -127,6 +146,50 @@ fn criterion_benchmark(c: &mut Criterion) { }) }); + c.bench_function("regexp_instr_1000 string", |b| { + let mut rng = rand::rng(); + let data = Arc::new(data(&mut rng)) as ArrayRef; + let regex = Arc::new(regex(&mut rng)) as ArrayRef; + let start = Arc::new(start(&mut rng)) as ArrayRef; + let n = Arc::new(n(&mut rng)) as ArrayRef; + let flags = Arc::new(flags(&mut rng)) as ArrayRef; + let subexp = Arc::new(subexp(&mut rng)) as ArrayRef; + + b.iter(|| { + black_box( + regexp_instr_func(&[ + Arc::clone(&data), + Arc::clone(®ex), + Arc::clone(&start), + Arc::clone(&n), + Arc::clone(&flags), + Arc::clone(&subexp), + ]) + .expect("regexp_instr should work on utf8"), + ) + }) + }); + + c.bench_function("regexp_instr_1000 utf8view", |b| { + let mut rng = rand::rng(); + let data = cast(&data(&mut rng), &DataType::Utf8View).unwrap(); + let regex = cast(®ex(&mut rng), &DataType::Utf8View).unwrap(); + let start = Arc::new(start(&mut rng)) as ArrayRef; + let flags = cast(&flags(&mut rng), &DataType::Utf8View).unwrap(); + + b.iter(|| { + black_box( + regexp_instr_func(&[ + Arc::clone(&data), + Arc::clone(®ex), + Arc::clone(&start), + Arc::clone(&flags), + ]) + .expect("regexp_instr should work on utf8view"), + ) + }) + }); + c.bench_function("regexp_like_1000", |b| { let mut rng = rand::rng(); let data = Arc::new(data(&mut rng)) as ArrayRef; diff --git a/datafusion/functions/benches/repeat.rs b/datafusion/functions/benches/repeat.rs index 175933f5f745f..991a5a467c0e3 100644 --- a/datafusion/functions/benches/repeat.rs +++ b/datafusion/functions/benches/repeat.rs @@ -23,6 +23,7 @@ use arrow::util::bench_util::{ create_string_array_with_len, create_string_view_array_with_len, }; use criterion::{black_box, criterion_group, criterion_main, Criterion, SamplingMode}; +use datafusion_common::config::ConfigOptions; use datafusion_common::DataFusionError; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::string; @@ -66,12 +67,14 @@ fn invoke_repeat_with_args( .enumerate() .map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into()) .collect::>(); + let config_options = Arc::new(ConfigOptions::default()); string::repeat().invoke_with_args(ScalarFunctionArgs { args, arg_fields, number_rows: repeat_times as usize, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), }) } diff --git a/datafusion/functions/benches/reverse.rs b/datafusion/functions/benches/reverse.rs index 6403660113051..acac674a6de06 100644 --- a/datafusion/functions/benches/reverse.rs +++ b/datafusion/functions/benches/reverse.rs @@ -20,12 +20,15 @@ mod helper; use arrow::datatypes::{DataType, Field}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::ScalarFunctionArgs; use helper::gen_string_array; +use std::sync::Arc; fn criterion_benchmark(c: &mut Criterion) { // All benches are single batch run with 8192 rows let reverse = datafusion_functions::unicode::reverse(); + let config_options = Arc::new(ConfigOptions::default()); const N_ROWS: usize = 8192; const NULL_DENSITY: f32 = 0.1; @@ -53,6 +56,7 @@ fn criterion_benchmark(c: &mut Criterion) { ).into()], number_rows: N_ROWS, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), })) }) }, @@ -74,6 +78,7 @@ fn criterion_benchmark(c: &mut Criterion) { ], number_rows: N_ROWS, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), })) }) }, @@ -100,6 +105,7 @@ fn criterion_benchmark(c: &mut Criterion) { ).into()], number_rows: N_ROWS, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), })) }) }, @@ -123,6 +129,7 @@ fn criterion_benchmark(c: &mut Criterion) { ).into()], number_rows: N_ROWS, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), })) }) }, diff --git a/datafusion/functions/benches/signum.rs b/datafusion/functions/benches/signum.rs index 10079bcc81c7d..d56f3930d2678 100644 --- a/datafusion/functions/benches/signum.rs +++ b/datafusion/functions/benches/signum.rs @@ -23,6 +23,7 @@ use arrow::{ util::bench_util::create_primitive_array, }; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::math::signum; use std::sync::Arc; @@ -41,6 +42,7 @@ fn criterion_benchmark(c: &mut Criterion) { }) .collect::>(); let return_field = Field::new("f", DataType::Float32, true).into(); + let config_options = Arc::new(ConfigOptions::default()); c.bench_function(&format!("signum f32 array: {size}"), |b| { b.iter(|| { @@ -51,6 +53,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: batch_len, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .unwrap(), ) @@ -78,6 +81,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: batch_len, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .unwrap(), ) diff --git a/datafusion/functions/benches/strpos.rs b/datafusion/functions/benches/strpos.rs index df32db1182f1f..fc31abb23d849 100644 --- a/datafusion/functions/benches/strpos.rs +++ b/datafusion/functions/benches/strpos.rs @@ -20,6 +20,7 @@ extern crate criterion; use arrow::array::{StringArray, StringViewArray}; use arrow::datatypes::{DataType, Field}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use rand::distr::Alphanumeric; use rand::prelude::StdRng; @@ -114,6 +115,8 @@ fn criterion_benchmark(c: &mut Criterion) { let arg_fields = vec![Field::new("a", args_string_ascii[0].data_type(), true).into()]; let return_field = Field::new("f", DataType::Int32, true).into(); + let config_options = Arc::new(ConfigOptions::default()); + c.bench_function( &format!("strpos_StringArray_ascii_str_len_{str_len}"), |b| { @@ -123,6 +126,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: n_rows, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), })) }) }, @@ -140,6 +144,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: n_rows, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), })) }) }); @@ -158,6 +163,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: n_rows, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), })) }) }, @@ -177,6 +183,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: n_rows, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), })) }) }, diff --git a/datafusion/functions/benches/substr.rs b/datafusion/functions/benches/substr.rs index 342e18b0d9a2e..f14f10894649f 100644 --- a/datafusion/functions/benches/substr.rs +++ b/datafusion/functions/benches/substr.rs @@ -23,6 +23,7 @@ use arrow::util::bench_util::{ create_string_array_with_len, create_string_view_array_with_len, }; use criterion::{black_box, criterion_group, criterion_main, Criterion, SamplingMode}; +use datafusion_common::config::ConfigOptions; use datafusion_common::DataFusionError; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::unicode; @@ -106,12 +107,14 @@ fn invoke_substr_with_args( .enumerate() .map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into()) .collect::>(); + let config_options = Arc::new(ConfigOptions::default()); unicode::substr().invoke_with_args(ScalarFunctionArgs { args: args.clone(), arg_fields, number_rows, return_field: Field::new("f", DataType::Utf8View, true).into(), + config_options: Arc::clone(&config_options), }) } diff --git a/datafusion/functions/benches/substr_index.rs b/datafusion/functions/benches/substr_index.rs index e772fb38fc400..2cc381e4545ee 100644 --- a/datafusion/functions/benches/substr_index.rs +++ b/datafusion/functions/benches/substr_index.rs @@ -22,13 +22,13 @@ use std::sync::Arc; use arrow::array::{ArrayRef, Int64Array, StringArray}; use arrow::datatypes::{DataType, Field}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; +use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; +use datafusion_functions::unicode::substr_index; use rand::distr::{Alphanumeric, Uniform}; use rand::prelude::Distribution; use rand::Rng; -use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; -use datafusion_functions::unicode::substr_index; - struct Filter { dist: Dist, test: Test, @@ -98,6 +98,7 @@ fn criterion_benchmark(c: &mut Criterion) { Field::new(format!("arg_{idx}"), arg.data_type(), true).into() }) .collect::>(); + let config_options = Arc::new(ConfigOptions::default()); b.iter(|| { black_box( @@ -107,6 +108,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: batch_len, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), }) .expect("substr_index should work on valid values"), ) diff --git a/datafusion/functions/benches/to_char.rs b/datafusion/functions/benches/to_char.rs index d19714ce61664..0f4cc264cbe02 100644 --- a/datafusion/functions/benches/to_char.rs +++ b/datafusion/functions/benches/to_char.rs @@ -24,14 +24,14 @@ use arrow::datatypes::{DataType, Field}; use chrono::prelude::*; use chrono::TimeDelta; use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use rand::prelude::IndexedRandom; -use rand::rngs::ThreadRng; -use rand::Rng; - +use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_common::ScalarValue::TimestampNanosecond; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::datetime::to_char; +use rand::prelude::IndexedRandom; +use rand::rngs::ThreadRng; +use rand::Rng; fn random_date_in_range( rng: &mut ThreadRng, @@ -81,6 +81,8 @@ fn patterns(rng: &mut ThreadRng) -> StringArray { } fn criterion_benchmark(c: &mut Criterion) { + let config_options = Arc::new(ConfigOptions::default()); + c.bench_function("to_char_array_array_1000", |b| { let mut rng = rand::rng(); let data_arr = data(&mut rng); @@ -99,6 +101,7 @@ fn criterion_benchmark(c: &mut Criterion) { ], number_rows: batch_len, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), }) .expect("to_char should work on valid values"), ) @@ -124,6 +127,7 @@ fn criterion_benchmark(c: &mut Criterion) { ], number_rows: batch_len, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), }) .expect("to_char should work on valid values"), ) @@ -155,6 +159,7 @@ fn criterion_benchmark(c: &mut Criterion) { ], number_rows: 1, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), }) .expect("to_char should work on valid values"), ) diff --git a/datafusion/functions/benches/to_hex.rs b/datafusion/functions/benches/to_hex.rs index 4a02b74ca42d1..cad9addab10ec 100644 --- a/datafusion/functions/benches/to_hex.rs +++ b/datafusion/functions/benches/to_hex.rs @@ -20,6 +20,7 @@ extern crate criterion; use arrow::datatypes::{DataType, Field, Int32Type, Int64Type}; use arrow::util::bench_util::create_primitive_array; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::string; use std::sync::Arc; @@ -30,6 +31,8 @@ fn criterion_benchmark(c: &mut Criterion) { let i32_array = Arc::new(create_primitive_array::(size, 0.2)); let batch_len = i32_array.len(); let i32_args = vec![ColumnarValue::Array(i32_array)]; + let config_options = Arc::new(ConfigOptions::default()); + c.bench_function(&format!("to_hex i32 array: {size}"), |b| { b.iter(|| { let args_cloned = i32_args.clone(); @@ -39,6 +42,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: vec![Field::new("a", DataType::Int32, false).into()], number_rows: batch_len, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), }) .unwrap(), ) @@ -56,6 +60,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: vec![Field::new("a", DataType::Int64, false).into()], number_rows: batch_len, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), }) .unwrap(), ) diff --git a/datafusion/functions/benches/to_timestamp.rs b/datafusion/functions/benches/to_timestamp.rs index d898113484899..7e15d896f83e3 100644 --- a/datafusion/functions/benches/to_timestamp.rs +++ b/datafusion/functions/benches/to_timestamp.rs @@ -24,7 +24,7 @@ use arrow::array::{Array, ArrayRef, StringArray}; use arrow::compute::cast; use arrow::datatypes::{DataType, Field, TimeUnit}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; - +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::datetime::to_timestamp; @@ -113,6 +113,8 @@ fn criterion_benchmark(c: &mut Criterion) { Field::new("f", DataType::Timestamp(TimeUnit::Nanosecond, None), true).into(); let arg_field = Field::new("a", DataType::Utf8, false).into(); let arg_fields = vec![arg_field]; + let config_options = Arc::new(ConfigOptions::default()); + c.bench_function("to_timestamp_no_formats_utf8", |b| { let arr_data = data(); let batch_len = arr_data.len(); @@ -126,6 +128,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: batch_len, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .expect("to_timestamp should work on valid values"), ) @@ -145,6 +148,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: batch_len, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .expect("to_timestamp should work on valid values"), ) @@ -164,6 +168,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: batch_len, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .expect("to_timestamp should work on valid values"), ) @@ -196,6 +201,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: batch_len, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .expect("to_timestamp should work on valid values"), ) @@ -236,6 +242,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: batch_len, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .expect("to_timestamp should work on valid values"), ) @@ -277,6 +284,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: batch_len, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .expect("to_timestamp should work on valid values"), ) diff --git a/datafusion/functions/benches/trunc.rs b/datafusion/functions/benches/trunc.rs index 897e21c1e1d94..160eac913d2b6 100644 --- a/datafusion/functions/benches/trunc.rs +++ b/datafusion/functions/benches/trunc.rs @@ -26,6 +26,7 @@ use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::math::trunc; use arrow::datatypes::DataType; +use datafusion_common::config::ConfigOptions; use std::sync::Arc; fn criterion_benchmark(c: &mut Criterion) { @@ -35,6 +36,8 @@ fn criterion_benchmark(c: &mut Criterion) { let f32_args = vec![ColumnarValue::Array(f32_array)]; let arg_fields = vec![Field::new("a", DataType::Float32, false).into()]; let return_field = Field::new("f", DataType::Float32, true).into(); + let config_options = Arc::new(ConfigOptions::default()); + c.bench_function(&format!("trunc f32 array: {size}"), |b| { b.iter(|| { black_box( @@ -44,6 +47,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .unwrap(), ) @@ -62,6 +66,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .unwrap(), ) diff --git a/datafusion/functions/benches/upper.rs b/datafusion/functions/benches/upper.rs index bf2c4161001e8..700f70b4b4f36 100644 --- a/datafusion/functions/benches/upper.rs +++ b/datafusion/functions/benches/upper.rs @@ -20,6 +20,7 @@ extern crate criterion; use arrow::datatypes::{DataType, Field}; use arrow::util::bench_util::create_string_array_with_len; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::string; use std::sync::Arc; @@ -35,6 +36,8 @@ fn create_args(size: usize, str_len: usize) -> Vec { fn criterion_benchmark(c: &mut Criterion) { let upper = string::upper(); + let config_options = Arc::new(ConfigOptions::default()); + for size in [1024, 4096, 8192] { let args = create_args(size, 32); c.bench_function("upper_all_values_are_ascii", |b| { @@ -45,6 +48,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: vec![Field::new("a", DataType::Utf8, true).into()], number_rows: size, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), })) }) }); diff --git a/datafusion/functions/benches/uuid.rs b/datafusion/functions/benches/uuid.rs index 942af122562ab..f9345a97eb53c 100644 --- a/datafusion/functions/benches/uuid.rs +++ b/datafusion/functions/benches/uuid.rs @@ -19,11 +19,15 @@ extern crate criterion; use arrow::datatypes::{DataType, Field}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::ScalarFunctionArgs; use datafusion_functions::string; +use std::sync::Arc; fn criterion_benchmark(c: &mut Criterion) { let uuid = string::uuid(); + let config_options = Arc::new(ConfigOptions::default()); + c.bench_function("uuid", |b| { b.iter(|| { black_box(uuid.invoke_with_args(ScalarFunctionArgs { @@ -31,6 +35,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: vec![], number_rows: 1024, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), })) }) }); diff --git a/datafusion/functions/src/core/union_extract.rs b/datafusion/functions/src/core/union_extract.rs index be49f82267121..a3d1ec82ffbb0 100644 --- a/datafusion/functions/src/core/union_extract.rs +++ b/datafusion/functions/src/core/union_extract.rs @@ -169,10 +169,11 @@ fn find_field<'a>(fields: &'a UnionFields, name: &str) -> Result<(i8, &'a FieldR #[cfg(test)] mod tests { - use arrow::datatypes::{DataType, Field, UnionFields, UnionMode}; + use datafusion_common::config::ConfigOptions; use datafusion_common::{Result, ScalarValue}; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl}; + use std::sync::Arc; use super::UnionExtractFun; @@ -207,6 +208,7 @@ mod tests { arg_fields, number_rows: 1, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::new(ConfigOptions::default()), })?; assert_scalar(result, ScalarValue::Utf8(None)); @@ -229,6 +231,7 @@ mod tests { arg_fields, number_rows: 1, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::new(ConfigOptions::default()), })?; assert_scalar(result, ScalarValue::Utf8(None)); @@ -250,6 +253,7 @@ mod tests { arg_fields, number_rows: 1, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::new(ConfigOptions::default()), })?; assert_scalar(result, ScalarValue::new_utf8("42")); diff --git a/datafusion/functions/src/core/union_tag.rs b/datafusion/functions/src/core/union_tag.rs index 3a4d96de2bc03..5d589d2167cbc 100644 --- a/datafusion/functions/src/core/union_tag.rs +++ b/datafusion/functions/src/core/union_tag.rs @@ -156,6 +156,7 @@ impl ScalarUDFImpl for UnionTagFunc { mod tests { use super::UnionTagFunc; use arrow::datatypes::{DataType, Field, UnionFields, UnionMode}; + use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl}; use std::sync::Arc; @@ -182,6 +183,7 @@ mod tests { number_rows: 1, return_field: Field::new("res", return_type, true).into(), arg_fields: vec![], + config_options: Arc::new(ConfigOptions::default()), }) .unwrap(); @@ -204,6 +206,7 @@ mod tests { number_rows: 1, return_field: Field::new("res", return_type, true).into(), arg_fields: vec![], + config_options: Arc::new(ConfigOptions::default()), }) .unwrap(); diff --git a/datafusion/functions/src/core/version.rs b/datafusion/functions/src/core/version.rs index b3abe246b4b3f..d68dbfc546ea5 100644 --- a/datafusion/functions/src/core/version.rs +++ b/datafusion/functions/src/core/version.rs @@ -98,7 +98,9 @@ impl ScalarUDFImpl for VersionFunc { mod test { use super::*; use arrow::datatypes::Field; + use datafusion_common::config::ConfigOptions; use datafusion_expr::ScalarUDF; + use std::sync::Arc; #[tokio::test] async fn test_version_udf() { @@ -109,6 +111,7 @@ mod test { arg_fields: vec![], number_rows: 0, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::new(ConfigOptions::default()), }) .unwrap(); diff --git a/datafusion/functions/src/datetime/date_bin.rs b/datafusion/functions/src/datetime/date_bin.rs index 1c801dfead723..d71bf31f95b9a 100644 --- a/datafusion/functions/src/datetime/date_bin.rs +++ b/datafusion/functions/src/datetime/date_bin.rs @@ -512,6 +512,7 @@ mod tests { use datafusion_expr::{ColumnarValue, ScalarUDFImpl}; use chrono::TimeDelta; + use datafusion_common::config::ConfigOptions; fn invoke_date_bin_with_args( args: Vec, @@ -528,6 +529,7 @@ mod tests { arg_fields, number_rows, return_field: Arc::clone(return_field), + config_options: Arc::new(ConfigOptions::default()), }; DateBinFunc::new().invoke_with_args(args) } diff --git a/datafusion/functions/src/datetime/date_trunc.rs b/datafusion/functions/src/datetime/date_trunc.rs index 8963ef77a53b9..e3c7b50cb1a8f 100644 --- a/datafusion/functions/src/datetime/date_trunc.rs +++ b/datafusion/functions/src/datetime/date_trunc.rs @@ -488,6 +488,7 @@ mod tests { use arrow::array::{Array, TimestampNanosecondArray}; use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos; use arrow::datatypes::{DataType, Field, TimeUnit}; + use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarUDFImpl}; @@ -743,6 +744,7 @@ mod tests { true, ) .into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = DateTruncFunc::new().invoke_with_args(args).unwrap(); if let ColumnarValue::Array(result) = result { @@ -915,6 +917,7 @@ mod tests { true, ) .into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = DateTruncFunc::new().invoke_with_args(args).unwrap(); if let ColumnarValue::Array(result) = result { diff --git a/datafusion/functions/src/datetime/from_unixtime.rs b/datafusion/functions/src/datetime/from_unixtime.rs index c1497040261ca..16eea0be8be61 100644 --- a/datafusion/functions/src/datetime/from_unixtime.rs +++ b/datafusion/functions/src/datetime/from_unixtime.rs @@ -164,6 +164,7 @@ mod test { use crate::datetime::from_unixtime::FromUnixtimeFunc; use arrow::datatypes::TimeUnit::Second; use arrow::datatypes::{DataType, Field}; + use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_common::ScalarValue::Int64; use datafusion_expr::{ColumnarValue, ScalarUDFImpl}; @@ -177,6 +178,7 @@ mod test { arg_fields: vec![arg_field], number_rows: 1, return_field: Field::new("f", DataType::Timestamp(Second, None), true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = FromUnixtimeFunc::new().invoke_with_args(args).unwrap(); @@ -209,6 +211,7 @@ mod test { true, ) .into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = FromUnixtimeFunc::new().invoke_with_args(args).unwrap(); diff --git a/datafusion/functions/src/datetime/make_date.rs b/datafusion/functions/src/datetime/make_date.rs index daa9bd83971f9..677b54cd15f00 100644 --- a/datafusion/functions/src/datetime/make_date.rs +++ b/datafusion/functions/src/datetime/make_date.rs @@ -122,6 +122,13 @@ impl ScalarUDFImpl for MakeDateFunc { let [years, months, days] = take_function_args(self.name(), args)?; + if matches!(years, ColumnarValue::Scalar(ScalarValue::Null)) + || matches!(months, ColumnarValue::Scalar(ScalarValue::Null)) + || matches!(days, ColumnarValue::Scalar(ScalarValue::Null)) + { + return Ok(ColumnarValue::Scalar(ScalarValue::Null)); + } + let years = years.cast_to(&Int32, None)?; let months = months.cast_to(&Int32, None)?; let days = days.cast_to(&Int32, None)?; @@ -224,6 +231,7 @@ mod tests { use crate::datetime::make_date::MakeDateFunc; use arrow::array::{Array, Date32Array, Int32Array, Int64Array, UInt32Array}; use arrow::datatypes::{DataType, Field}; + use datafusion_common::config::ConfigOptions; use datafusion_common::{DataFusionError, ScalarValue}; use datafusion_expr::{ColumnarValue, ScalarUDFImpl}; use std::sync::Arc; @@ -241,6 +249,7 @@ mod tests { arg_fields, number_rows, return_field: Field::new("f", DataType::Date32, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; MakeDateFunc::new().invoke_with_args(args) } @@ -377,4 +386,19 @@ mod tests { "Arrow error: Cast error: Can't cast value 4294967295 to type Int32" ); } + + #[test] + fn test_make_date_null_param() { + let res = invoke_make_date_with_args( + vec![ + ColumnarValue::Scalar(ScalarValue::Null), + ColumnarValue::Scalar(ScalarValue::Int64(Some(1))), + ColumnarValue::Scalar(ScalarValue::UInt32(Some(14))), + ], + 1, + ) + .expect("that make_date parsed values without error"); + + assert!(matches!(res, ColumnarValue::Scalar(ScalarValue::Null))); + } } diff --git a/datafusion/functions/src/datetime/to_char.rs b/datafusion/functions/src/datetime/to_char.rs index 219a9b5764237..2f7e5fa56eb13 100644 --- a/datafusion/functions/src/datetime/to_char.rs +++ b/datafusion/functions/src/datetime/to_char.rs @@ -306,6 +306,7 @@ mod tests { }; use arrow::datatypes::{DataType, Field, TimeUnit}; use chrono::{NaiveDateTime, Timelike}; + use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarUDFImpl}; use std::sync::Arc; @@ -395,6 +396,7 @@ mod tests { arg_fields, number_rows: 1, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = ToCharFunc::new() .invoke_with_args(args) @@ -483,6 +485,7 @@ mod tests { arg_fields, number_rows: batch_len, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = ToCharFunc::new() .invoke_with_args(args) @@ -619,6 +622,7 @@ mod tests { arg_fields, number_rows: batch_len, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = ToCharFunc::new() .invoke_with_args(args) @@ -646,6 +650,7 @@ mod tests { arg_fields, number_rows: batch_len, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = ToCharFunc::new() .invoke_with_args(args) @@ -670,6 +675,7 @@ mod tests { arg_fields: vec![arg_field], number_rows: 1, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = ToCharFunc::new().invoke_with_args(args); assert_eq!( @@ -690,6 +696,7 @@ mod tests { arg_fields, number_rows: 1, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = ToCharFunc::new().invoke_with_args(args); assert_eq!( diff --git a/datafusion/functions/src/datetime/to_date.rs b/datafusion/functions/src/datetime/to_date.rs index c9fd17dbef11f..d1b2720867d2f 100644 --- a/datafusion/functions/src/datetime/to_date.rs +++ b/datafusion/functions/src/datetime/to_date.rs @@ -162,15 +162,15 @@ impl ScalarUDFImpl for ToDateFunc { #[cfg(test)] mod tests { + use super::ToDateFunc; use arrow::array::{Array, Date32Array, GenericStringArray, StringViewArray}; use arrow::datatypes::{DataType, Field}; use arrow::{compute::kernels::cast_utils::Parser, datatypes::Date32Type}; + use datafusion_common::config::ConfigOptions; use datafusion_common::{DataFusionError, ScalarValue}; use datafusion_expr::{ColumnarValue, ScalarUDFImpl}; use std::sync::Arc; - use super::ToDateFunc; - fn invoke_to_date_with_args( args: Vec, number_rows: usize, @@ -185,6 +185,7 @@ mod tests { arg_fields, number_rows, return_field: Field::new("f", DataType::Date32, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; ToDateFunc::new().invoke_with_args(args) } diff --git a/datafusion/functions/src/datetime/to_local_time.rs b/datafusion/functions/src/datetime/to_local_time.rs index b9ebe537d459b..b6d4404d6d468 100644 --- a/datafusion/functions/src/datetime/to_local_time.rs +++ b/datafusion/functions/src/datetime/to_local_time.rs @@ -411,6 +411,7 @@ mod tests { use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos; use arrow::datatypes::{DataType, Field, TimeUnit}; use chrono::NaiveDateTime; + use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl}; @@ -545,6 +546,7 @@ mod tests { arg_fields: vec![arg_field], number_rows: 1, return_field: Field::new("f", expected.data_type(), true).into(), + config_options: Arc::new(ConfigOptions::default()), }) .unwrap(); match res { @@ -615,6 +617,7 @@ mod tests { true, ) .into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = ToLocalTimeFunc::new().invoke_with_args(args).unwrap(); if let ColumnarValue::Array(result) = result { diff --git a/datafusion/functions/src/datetime/to_timestamp.rs b/datafusion/functions/src/datetime/to_timestamp.rs index 8b26a1c259505..9bd94c8ca8d94 100644 --- a/datafusion/functions/src/datetime/to_timestamp.rs +++ b/datafusion/functions/src/datetime/to_timestamp.rs @@ -19,12 +19,14 @@ use std::any::Any; use std::sync::Arc; use crate::datetime::common::*; +use arrow::array::Float64Array; use arrow::datatypes::DataType::*; use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second}; use arrow::datatypes::{ ArrowTimestampType, DataType, TimeUnit, TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, }; +use datafusion_common::format::DEFAULT_CAST_OPTIONS; use datafusion_common::{exec_err, Result, ScalarType, ScalarValue}; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, @@ -319,9 +321,22 @@ impl ScalarUDFImpl for ToTimestampFunc { Int32 | Int64 => args[0] .cast_to(&Timestamp(Second, None), None)? .cast_to(&Timestamp(Nanosecond, None), None), - Null | Float64 | Timestamp(_, None) => { + Null | Timestamp(_, None) => { args[0].cast_to(&Timestamp(Nanosecond, None), None) } + Float64 => { + let rescaled = arrow::compute::kernels::numeric::mul( + &args[0].to_array(1)?, + &arrow::array::Scalar::new(Float64Array::from(vec![ + 1_000_000_000f64, + ])), + )?; + Ok(ColumnarValue::Array(arrow::compute::cast_with_options( + &rescaled, + &Timestamp(Nanosecond, None), + &DEFAULT_CAST_OPTIONS, + )?)) + } Timestamp(_, Some(tz)) => { args[0].cast_to(&Timestamp(Nanosecond, Some(tz)), None) } @@ -641,6 +656,7 @@ mod tests { use arrow::array::{ArrayRef, Int64Array, StringBuilder}; use arrow::datatypes::{Field, TimeUnit}; use chrono::Utc; + use datafusion_common::config::ConfigOptions; use datafusion_common::{assert_contains, DataFusionError, ScalarValue}; use datafusion_expr::ScalarFunctionImplementation; @@ -1019,6 +1035,7 @@ mod tests { arg_fields: vec![arg_field], number_rows: 4, return_field: Field::new("f", rt, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let res = udf .invoke_with_args(args) @@ -1068,6 +1085,7 @@ mod tests { arg_fields: vec![arg_field], number_rows: 5, return_field: Field::new("f", rt, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let res = udf .invoke_with_args(args) diff --git a/datafusion/functions/src/math/log.rs b/datafusion/functions/src/math/log.rs index 23e267a323b91..186d0d3c4717c 100644 --- a/datafusion/functions/src/math/log.rs +++ b/datafusion/functions/src/math/log.rs @@ -260,6 +260,7 @@ mod tests { use arrow::compute::SortOptions; use arrow::datatypes::Field; use datafusion_common::cast::{as_float32_array, as_float64_array}; + use datafusion_common::config::ConfigOptions; use datafusion_common::DFSchema; use datafusion_expr::execution_props::ExecutionProps; use datafusion_expr::simplify::SimplifyContext; @@ -281,6 +282,7 @@ mod tests { arg_fields, number_rows: 4, return_field: Field::new("f", DataType::Float64, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let _ = LogFunc::new().invoke_with_args(args); } @@ -295,6 +297,7 @@ mod tests { arg_fields: vec![arg_field], number_rows: 1, return_field: Field::new("f", DataType::Float64, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = LogFunc::new().invoke_with_args(args); @@ -311,6 +314,7 @@ mod tests { arg_fields: vec![arg_field], number_rows: 1, return_field: Field::new("f", DataType::Float32, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = LogFunc::new() .invoke_with_args(args) @@ -340,6 +344,7 @@ mod tests { arg_fields: vec![arg_field], number_rows: 1, return_field: Field::new("f", DataType::Float64, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = LogFunc::new() .invoke_with_args(args) @@ -373,6 +378,7 @@ mod tests { arg_fields, number_rows: 1, return_field: Field::new("f", DataType::Float32, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = LogFunc::new() .invoke_with_args(args) @@ -406,6 +412,7 @@ mod tests { arg_fields, number_rows: 1, return_field: Field::new("f", DataType::Float64, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = LogFunc::new() .invoke_with_args(args) @@ -437,6 +444,7 @@ mod tests { arg_fields: vec![arg_field], number_rows: 4, return_field: Field::new("f", DataType::Float64, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = LogFunc::new() .invoke_with_args(args) @@ -471,6 +479,7 @@ mod tests { arg_fields: vec![arg_field], number_rows: 4, return_field: Field::new("f", DataType::Float32, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = LogFunc::new() .invoke_with_args(args) @@ -511,6 +520,7 @@ mod tests { arg_fields, number_rows: 4, return_field: Field::new("f", DataType::Float64, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = LogFunc::new() .invoke_with_args(args) @@ -551,6 +561,7 @@ mod tests { arg_fields, number_rows: 4, return_field: Field::new("f", DataType::Float32, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = LogFunc::new() .invoke_with_args(args) diff --git a/datafusion/functions/src/math/power.rs b/datafusion/functions/src/math/power.rs index 465844704f591..87f27f5a793c4 100644 --- a/datafusion/functions/src/math/power.rs +++ b/datafusion/functions/src/math/power.rs @@ -189,11 +189,11 @@ fn is_log(func: &ScalarUDF) -> bool { #[cfg(test)] mod tests { + use super::*; use arrow::array::Float64Array; use arrow::datatypes::Field; use datafusion_common::cast::{as_float64_array, as_int64_array}; - - use super::*; + use datafusion_common::config::ConfigOptions; #[test] fn test_power_f64() { @@ -213,6 +213,7 @@ mod tests { arg_fields, number_rows: 4, return_field: Field::new("f", DataType::Float64, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = PowerFunc::new() .invoke_with_args(args) @@ -248,6 +249,7 @@ mod tests { arg_fields, number_rows: 4, return_field: Field::new("f", DataType::Int64, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = PowerFunc::new() .invoke_with_args(args) diff --git a/datafusion/functions/src/math/signum.rs b/datafusion/functions/src/math/signum.rs index ec6ef5a78c6a7..71d32413afc83 100644 --- a/datafusion/functions/src/math/signum.rs +++ b/datafusion/functions/src/math/signum.rs @@ -140,6 +140,7 @@ mod test { use arrow::array::{ArrayRef, Float32Array, Float64Array}; use arrow::datatypes::{DataType, Field}; use datafusion_common::cast::{as_float32_array, as_float64_array}; + use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl}; use crate::math::signum::SignumFunc; @@ -163,6 +164,7 @@ mod test { arg_fields, number_rows: array.len(), return_field: Field::new("f", DataType::Float32, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = SignumFunc::new() .invoke_with_args(args) @@ -209,6 +211,7 @@ mod test { arg_fields, number_rows: array.len(), return_field: Field::new("f", DataType::Float64, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = SignumFunc::new() .invoke_with_args(args) diff --git a/datafusion/functions/src/regex/mod.rs b/datafusion/functions/src/regex/mod.rs index 13fbc049af582..da4e23f91de7d 100644 --- a/datafusion/functions/src/regex/mod.rs +++ b/datafusion/functions/src/regex/mod.rs @@ -17,15 +17,20 @@ //! "regex" DataFusion functions +use arrow::error::ArrowError; +use regex::Regex; +use std::collections::hash_map::Entry; +use std::collections::HashMap; use std::sync::Arc; - pub mod regexpcount; +pub mod regexpinstr; pub mod regexplike; pub mod regexpmatch; pub mod regexpreplace; // create UDFs make_udf_function!(regexpcount::RegexpCountFunc, regexp_count); +make_udf_function!(regexpinstr::RegexpInstrFunc, regexp_instr); make_udf_function!(regexpmatch::RegexpMatchFunc, regexp_match); make_udf_function!(regexplike::RegexpLikeFunc, regexp_like); make_udf_function!(regexpreplace::RegexpReplaceFunc, regexp_replace); @@ -60,7 +65,35 @@ pub mod expr_fn { super::regexp_match().call(args) } - /// Returns true if a has at least one match in a string, false otherwise. + /// Returns index of regular expression matches in a string. + pub fn regexp_instr( + values: Expr, + regex: Expr, + start: Option, + n: Option, + endoption: Option, + flags: Option, + subexpr: Option, + ) -> Expr { + let mut args = vec![values, regex]; + if let Some(start) = start { + args.push(start); + }; + if let Some(n) = n { + args.push(n); + }; + if let Some(endoption) = endoption { + args.push(endoption); + }; + if let Some(flags) = flags { + args.push(flags); + }; + if let Some(subexpr) = subexpr { + args.push(subexpr); + }; + super::regexp_instr().call(args) + } + /// Returns true if a regex has at least one match in a string, false otherwise. pub fn regexp_like(values: Expr, regex: Expr, flags: Option) -> Expr { let mut args = vec![values, regex]; if let Some(flags) = flags { @@ -89,7 +122,45 @@ pub fn functions() -> Vec> { vec![ regexp_count(), regexp_match(), + regexp_instr(), regexp_like(), regexp_replace(), ] } + +pub fn compile_and_cache_regex<'strings, 'cache>( + regex: &'strings str, + flags: Option<&'strings str>, + regex_cache: &'cache mut HashMap<(&'strings str, Option<&'strings str>), Regex>, +) -> Result<&'cache Regex, ArrowError> +where + 'strings: 'cache, +{ + let result = match regex_cache.entry((regex, flags)) { + Entry::Occupied(occupied_entry) => occupied_entry.into_mut(), + Entry::Vacant(vacant_entry) => { + let compiled = compile_regex(regex, flags)?; + vacant_entry.insert(compiled) + } + }; + Ok(result) +} + +pub fn compile_regex(regex: &str, flags: Option<&str>) -> Result { + let pattern = match flags { + None | Some("") => regex.to_string(), + Some(flags) => { + if flags.contains("g") { + return Err(ArrowError::ComputeError( + "regexp_count()/regexp_instr() does not support the global flag" + .to_string(), + )); + } + format!("(?{flags}){regex}") + } + }; + + Regex::new(&pattern).map_err(|_| { + ArrowError::ComputeError(format!("Regular expression did not compile: {pattern}")) + }) +} diff --git a/datafusion/functions/src/regex/regexpcount.rs b/datafusion/functions/src/regex/regexpcount.rs index 52ab3d489ee31..a069455281bdb 100644 --- a/datafusion/functions/src/regex/regexpcount.rs +++ b/datafusion/functions/src/regex/regexpcount.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use crate::regex::{compile_and_cache_regex, compile_regex}; use arrow::array::{Array, ArrayRef, AsArray, Datum, Int64Array, StringArrayType}; use arrow::datatypes::{DataType, Int64Type}; use arrow::datatypes::{ @@ -29,7 +30,6 @@ use datafusion_expr::{ use datafusion_macros::user_doc; use itertools::izip; use regex::Regex; -use std::collections::hash_map::Entry; use std::collections::HashMap; use std::sync::Arc; @@ -550,42 +550,6 @@ where } } -fn compile_and_cache_regex<'strings, 'cache>( - regex: &'strings str, - flags: Option<&'strings str>, - regex_cache: &'cache mut HashMap<(&'strings str, Option<&'strings str>), Regex>, -) -> Result<&'cache Regex, ArrowError> -where - 'strings: 'cache, -{ - let result = match regex_cache.entry((regex, flags)) { - Entry::Occupied(occupied_entry) => occupied_entry.into_mut(), - Entry::Vacant(vacant_entry) => { - let compiled = compile_regex(regex, flags)?; - vacant_entry.insert(compiled) - } - }; - Ok(result) -} - -fn compile_regex(regex: &str, flags: Option<&str>) -> Result { - let pattern = match flags { - None | Some("") => regex.to_string(), - Some(flags) => { - if flags.contains("g") { - return Err(ArrowError::ComputeError( - "regexp_count() does not support global flag".to_string(), - )); - } - format!("(?{flags}){regex}") - } - }; - - Regex::new(&pattern).map_err(|_| { - ArrowError::ComputeError(format!("Regular expression did not compile: {pattern}")) - }) -} - fn count_matches( value: Option<&str>, pattern: &Regex, @@ -617,6 +581,7 @@ mod tests { use super::*; use arrow::array::{GenericStringArray, StringViewArray}; use arrow::datatypes::Field; + use datafusion_common::config::ConfigOptions; use datafusion_expr::ScalarFunctionArgs; #[test] @@ -662,6 +627,7 @@ mod tests { arg_fields, number_rows: args.len(), return_field: Field::new("f", Int64, true).into(), + config_options: Arc::new(ConfigOptions::default()), }) } diff --git a/datafusion/functions/src/regex/regexpinstr.rs b/datafusion/functions/src/regex/regexpinstr.rs new file mode 100644 index 0000000000000..577a8f5bc33d7 --- /dev/null +++ b/datafusion/functions/src/regex/regexpinstr.rs @@ -0,0 +1,824 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::array::{ + Array, ArrayRef, AsArray, Datum, Int64Array, PrimitiveArray, StringArrayType, +}; +use arrow::datatypes::{DataType, Int64Type}; +use arrow::datatypes::{ + DataType::Int64, DataType::LargeUtf8, DataType::Utf8, DataType::Utf8View, +}; +use arrow::error::ArrowError; +use datafusion_common::{exec_err, internal_err, Result, ScalarValue}; +use datafusion_expr::{ + ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature::Exact, + TypeSignature::Uniform, Volatility, +}; +use datafusion_macros::user_doc; +use itertools::izip; +use regex::Regex; +use std::collections::HashMap; +use std::sync::Arc; + +use crate::regex::compile_and_cache_regex; + +#[user_doc( + doc_section(label = "Regular Expression Functions"), + description = "Returns the position in a string where the specified occurrence of a POSIX regular expression is located.", + syntax_example = "regexp_instr(str, regexp[, start[, N[, flags[, subexpr]]]])", + sql_example = r#"```sql +> SELECT regexp_instr('ABCDEF', 'C(.)(..)'); ++---------------------------------------------------------------+ +| regexp_instr(Utf8("ABCDEF"),Utf8("C(.)(..)")) | ++---------------------------------------------------------------+ +| 3 | ++---------------------------------------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + standard_argument(name = "regexp", prefix = "Regular"), + argument( + name = "start", + description = "- **start**: Optional start position (the first position is 1) to search for the regular expression. Can be a constant, column, or function. Defaults to 1" + ), + argument( + name = "N", + description = "- **N**: Optional The N-th occurrence of pattern to find. Defaults to 1 (first match). Can be a constant, column, or function." + ), + argument( + name = "flags", + description = r#"Optional regular expression flags that control the behavior of the regular expression. The following flags are supported: + - **i**: case-insensitive: letters match both upper and lower case + - **m**: multi-line mode: ^ and $ match begin/end of line + - **s**: allow . to match \n + - **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used + - **U**: swap the meaning of x* and x*?"# + ), + argument( + name = "subexpr", + description = "Optional Specifies which capture group (subexpression) to return the position for. Defaults to 0, which returns the position of the entire match." + ) +)] +#[derive(Debug)] +pub struct RegexpInstrFunc { + signature: Signature, +} + +impl Default for RegexpInstrFunc { + fn default() -> Self { + Self::new() + } +} + +impl RegexpInstrFunc { + pub fn new() -> Self { + Self { + signature: Signature::one_of( + vec![ + Uniform(2, vec![Utf8View, LargeUtf8, Utf8]), + Exact(vec![Utf8View, Utf8View, Int64]), + Exact(vec![LargeUtf8, LargeUtf8, Int64]), + Exact(vec![Utf8, Utf8, Int64]), + Exact(vec![Utf8View, Utf8View, Int64, Int64]), + Exact(vec![LargeUtf8, LargeUtf8, Int64, Int64]), + Exact(vec![Utf8, Utf8, Int64, Int64]), + Exact(vec![Utf8View, Utf8View, Int64, Int64, Utf8View]), + Exact(vec![LargeUtf8, LargeUtf8, Int64, Int64, LargeUtf8]), + Exact(vec![Utf8, Utf8, Int64, Int64, Utf8]), + Exact(vec![Utf8View, Utf8View, Int64, Int64, Utf8View, Int64]), + Exact(vec![LargeUtf8, LargeUtf8, Int64, Int64, LargeUtf8, Int64]), + Exact(vec![Utf8, Utf8, Int64, Int64, Utf8, Int64]), + ], + Volatility::Immutable, + ), + } + } +} + +impl ScalarUDFImpl for RegexpInstrFunc { + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn name(&self) -> &str { + "regexp_instr" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + Ok(Int64) + } + + fn invoke_with_args( + &self, + args: datafusion_expr::ScalarFunctionArgs, + ) -> Result { + let args = &args.args; + + let len = args + .iter() + .fold(Option::::None, |acc, arg| match arg { + ColumnarValue::Scalar(_) => acc, + ColumnarValue::Array(a) => Some(a.len()), + }); + + let is_scalar = len.is_none(); + let inferred_length = len.unwrap_or(1); + let args = args + .iter() + .map(|arg| arg.to_array(inferred_length)) + .collect::>>()?; + + let result = regexp_instr_func(&args); + if is_scalar { + // If all inputs are scalar, keeps output as scalar + let result = result.and_then(|arr| ScalarValue::try_from_array(&arr, 0)); + result.map(ColumnarValue::Scalar) + } else { + result.map(ColumnarValue::Array) + } + } + + fn documentation(&self) -> Option<&Documentation> { + self.doc() + } +} + +pub fn regexp_instr_func(args: &[ArrayRef]) -> Result { + let args_len = args.len(); + if !(2..=6).contains(&args_len) { + return exec_err!("regexp_instr was called with {args_len} arguments. It requires at least 2 and at most 6."); + } + + let values = &args[0]; + match values.data_type() { + Utf8 | LargeUtf8 | Utf8View => (), + other => { + return internal_err!( + "Unsupported data type {other:?} for function regexp_instr" + ); + } + } + + regexp_instr( + values, + &args[1], + if args_len > 2 { Some(&args[2]) } else { None }, + if args_len > 3 { Some(&args[3]) } else { None }, + if args_len > 4 { Some(&args[4]) } else { None }, + if args_len > 5 { Some(&args[5]) } else { None }, + ) + .map_err(|e| e.into()) +} + +/// `arrow-rs` style implementation of `regexp_instr` function. +/// This function `regexp_instr` is responsible for returning the index of a regular expression pattern +/// within a string array. It supports optional start positions and flags for case insensitivity. +/// +/// The function accepts a variable number of arguments: +/// - `values`: The array of strings to search within. +/// - `regex_array`: The array of regular expression patterns to search for. +/// - `start_array` (optional): The array of start positions for the search. +/// - `nth_array` (optional): The array of start nth for the search. +/// - `endoption_array` (optional): The array of endoption positions for the search. +/// - `flags_array` (optional): The array of flags to modify the search behavior (e.g., case insensitivity). +/// - `subexpr_array` (optional): The array of subexpr positions for the search. +/// +/// The function handles different combinations of scalar and array inputs for the regex patterns, start positions, +/// and flags. It uses a cache to store compiled regular expressions for efficiency. +/// +/// # Errors +/// Returns an error if the input arrays have mismatched lengths or if the regular expression fails to compile. +pub fn regexp_instr( + values: &dyn Array, + regex_array: &dyn Datum, + start_array: Option<&dyn Datum>, + nth_array: Option<&dyn Datum>, + flags_array: Option<&dyn Datum>, + subexpr_array: Option<&dyn Datum>, +) -> Result { + let (regex_array, _) = regex_array.get(); + let start_array = start_array.map(|start| { + let (start, _) = start.get(); + start + }); + let nth_array = nth_array.map(|nth| { + let (nth, _) = nth.get(); + nth + }); + let flags_array = flags_array.map(|flags| { + let (flags, _) = flags.get(); + flags + }); + let subexpr_array = subexpr_array.map(|subexpr| { + let (subexpr, _) = subexpr.get(); + subexpr + }); + + match (values.data_type(), regex_array.data_type(), flags_array) { + (Utf8, Utf8, None) => regexp_instr_inner( + values.as_string::(), + regex_array.as_string::(), + start_array.map(|start| start.as_primitive::()), + nth_array.map(|nth| nth.as_primitive::()), + None, + subexpr_array.map(|subexpr| subexpr.as_primitive::()), + ), + (Utf8, Utf8, Some(flags_array)) if *flags_array.data_type() == Utf8 => regexp_instr_inner( + values.as_string::(), + regex_array.as_string::(), + start_array.map(|start| start.as_primitive::()), + nth_array.map(|nth| nth.as_primitive::()), + Some(flags_array.as_string::()), + subexpr_array.map(|subexpr| subexpr.as_primitive::()), + ), + (LargeUtf8, LargeUtf8, None) => regexp_instr_inner( + values.as_string::(), + regex_array.as_string::(), + start_array.map(|start| start.as_primitive::()), + nth_array.map(|nth| nth.as_primitive::()), + None, + subexpr_array.map(|subexpr| subexpr.as_primitive::()), + ), + (LargeUtf8, LargeUtf8, Some(flags_array)) if *flags_array.data_type() == LargeUtf8 => regexp_instr_inner( + values.as_string::(), + regex_array.as_string::(), + start_array.map(|start| start.as_primitive::()), + nth_array.map(|nth| nth.as_primitive::()), + Some(flags_array.as_string::()), + subexpr_array.map(|subexpr| subexpr.as_primitive::()), + ), + (Utf8View, Utf8View, None) => regexp_instr_inner( + values.as_string_view(), + regex_array.as_string_view(), + start_array.map(|start| start.as_primitive::()), + nth_array.map(|nth| nth.as_primitive::()), + None, + subexpr_array.map(|subexpr| subexpr.as_primitive::()), + ), + (Utf8View, Utf8View, Some(flags_array)) if *flags_array.data_type() == Utf8View => regexp_instr_inner( + values.as_string_view(), + regex_array.as_string_view(), + start_array.map(|start| start.as_primitive::()), + nth_array.map(|nth| nth.as_primitive::()), + Some(flags_array.as_string_view()), + subexpr_array.map(|subexpr| subexpr.as_primitive::()), + ), + _ => Err(ArrowError::ComputeError( + "regexp_instr() expected the input arrays to be of type Utf8, LargeUtf8, or Utf8View and the data types of the values, regex_array, and flags_array to match".to_string(), + )), + } +} + +#[allow(clippy::too_many_arguments)] +pub fn regexp_instr_inner<'a, S>( + values: S, + regex_array: S, + start_array: Option<&Int64Array>, + nth_array: Option<&Int64Array>, + flags_array: Option, + subexp_array: Option<&Int64Array>, +) -> Result +where + S: StringArrayType<'a>, +{ + let len = values.len(); + + let default_start_array = PrimitiveArray::::from(vec![1; len]); + let start_array = start_array.unwrap_or(&default_start_array); + let start_input: Vec = (0..start_array.len()) + .map(|i| start_array.value(i)) // handle nulls as 0 + .collect(); + + let default_nth_array = PrimitiveArray::::from(vec![1; len]); + let nth_array = nth_array.unwrap_or(&default_nth_array); + let nth_input: Vec = (0..nth_array.len()) + .map(|i| nth_array.value(i)) // handle nulls as 0 + .collect(); + + let flags_input = match flags_array { + Some(flags) => flags.iter().collect(), + None => vec![None; len], + }; + + let default_subexp_array = PrimitiveArray::::from(vec![0; len]); + let subexp_array = subexp_array.unwrap_or(&default_subexp_array); + let subexp_input: Vec = (0..subexp_array.len()) + .map(|i| subexp_array.value(i)) // handle nulls as 0 + .collect(); + + let mut regex_cache = HashMap::new(); + + let result: Result>, ArrowError> = izip!( + values.iter(), + regex_array.iter(), + start_input.iter(), + nth_input.iter(), + flags_input.iter(), + subexp_input.iter() + ) + .map(|(value, regex, start, nth, flags, subexp)| match regex { + None => Ok(None), + Some("") => Ok(Some(0)), + Some(regex) => get_index( + value, + regex, + *start, + *nth, + *subexp, + *flags, + &mut regex_cache, + ), + }) + .collect(); + Ok(Arc::new(Int64Array::from(result?))) +} + +fn handle_subexp( + pattern: &Regex, + search_slice: &str, + subexpr: i64, + value: &str, + byte_start_offset: usize, +) -> Result, ArrowError> { + if let Some(captures) = pattern.captures(search_slice) { + if let Some(matched) = captures.get(subexpr as usize) { + // Convert byte offset relative to search_slice back to 1-based character offset + // relative to the original `value` string. + let start_char_offset = + value[..byte_start_offset + matched.start()].chars().count() as i64 + 1; + return Ok(Some(start_char_offset)); + } + } + Ok(Some(0)) // Return 0 if the subexpression was not found +} + +fn get_nth_match( + pattern: &Regex, + search_slice: &str, + n: i64, + byte_start_offset: usize, + value: &str, +) -> Result, ArrowError> { + if let Some(mat) = pattern.find_iter(search_slice).nth((n - 1) as usize) { + // Convert byte offset relative to search_slice back to 1-based character offset + // relative to the original `value` string. + let match_start_byte_offset = byte_start_offset + mat.start(); + let match_start_char_offset = + value[..match_start_byte_offset].chars().count() as i64 + 1; + Ok(Some(match_start_char_offset)) + } else { + Ok(Some(0)) // Return 0 if the N-th match was not found + } +} +fn get_index<'strings, 'cache>( + value: Option<&str>, + pattern: &'strings str, + start: i64, + n: i64, + subexpr: i64, + flags: Option<&'strings str>, + regex_cache: &'cache mut HashMap<(&'strings str, Option<&'strings str>), Regex>, +) -> Result, ArrowError> +where + 'strings: 'cache, +{ + let value = match value { + None => return Ok(None), + Some("") => return Ok(Some(0)), + Some(value) => value, + }; + let pattern: &Regex = compile_and_cache_regex(pattern, flags, regex_cache)?; + // println!("get_index: value = {}, pattern = {}, start = {}, n = {}, subexpr = {}, flags = {:?}", value, pattern, start, n, subexpr, flags); + if start < 1 { + return Err(ArrowError::ComputeError( + "regexp_instr() requires start to be 1-based".to_string(), + )); + } + + if n < 1 { + return Err(ArrowError::ComputeError( + "N must be 1 or greater".to_string(), + )); + } + + // --- Simplified byte_start_offset calculation --- + let total_chars = value.chars().count() as i64; + let byte_start_offset: usize = if start > total_chars { + // If start is beyond the total characters, it means we start searching + // after the string effectively. No matches possible. + return Ok(Some(0)); + } else { + // Get the byte offset for the (start - 1)-th character (0-based) + value + .char_indices() + .nth((start - 1) as usize) + .map(|(idx, _)| idx) + .unwrap_or(0) // Should not happen if start is valid and <= total_chars + }; + // --- End simplified calculation --- + + let search_slice = &value[byte_start_offset..]; + + // Handle subexpression capturing first, as it takes precedence + if subexpr > 0 { + return handle_subexp(pattern, search_slice, subexpr, value, byte_start_offset); + } + + // Use nth to get the N-th match (n is 1-based, nth is 0-based) + get_nth_match(pattern, search_slice, n, byte_start_offset, value) +} + +#[cfg(test)] +mod tests { + use super::*; + use arrow::array::Int64Array; + use arrow::array::{GenericStringArray, StringViewArray}; + use arrow::datatypes::Field; + use datafusion_common::config::ConfigOptions; + use datafusion_expr::ScalarFunctionArgs; + #[test] + fn test_regexp_instr() { + test_case_sensitive_regexp_instr_nulls(); + test_case_sensitive_regexp_instr_scalar(); + test_case_sensitive_regexp_instr_scalar_start(); + test_case_sensitive_regexp_instr_scalar_nth(); + test_case_sensitive_regexp_instr_scalar_subexp(); + + test_case_sensitive_regexp_instr_array::>(); + test_case_sensitive_regexp_instr_array::>(); + test_case_sensitive_regexp_instr_array::(); + + test_case_sensitive_regexp_instr_array_start::>(); + test_case_sensitive_regexp_instr_array_start::>(); + test_case_sensitive_regexp_instr_array_start::(); + + test_case_sensitive_regexp_instr_array_nth::>(); + test_case_sensitive_regexp_instr_array_nth::>(); + test_case_sensitive_regexp_instr_array_nth::(); + } + + fn regexp_instr_with_scalar_values(args: &[ScalarValue]) -> Result { + let args_values: Vec = args + .iter() + .map(|sv| ColumnarValue::Scalar(sv.clone())) + .collect(); + + let arg_fields = args + .iter() + .enumerate() + .map(|(idx, a)| { + Arc::new(Field::new(format!("arg_{idx}"), a.data_type(), true)) + }) + .collect::>(); + + RegexpInstrFunc::new().invoke_with_args(ScalarFunctionArgs { + args: args_values, + arg_fields, + number_rows: args.len(), + return_field: Arc::new(Field::new("f", Int64, true)), + config_options: Arc::new(ConfigOptions::default()), + }) + } + + fn test_case_sensitive_regexp_instr_nulls() { + let v = ""; + let r = ""; + let expected = 0; + let regex_sv = ScalarValue::Utf8(Some(r.to_string())); + let re = regexp_instr_with_scalar_values(&[v.to_string().into(), regex_sv]); + // let res_exp = re.unwrap(); + match re { + Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => { + assert_eq!(v, Some(expected), "regexp_instr scalar test failed"); + } + _ => panic!("Unexpected result"), + } + } + fn test_case_sensitive_regexp_instr_scalar() { + let values = [ + "hello world", + "abcdefg", + "xyz123xyz", + "no match here", + "abc", + "ДатаФусион数据融合📊🔥", + ]; + let regex = ["o", "d", "123", "z", "gg", "📊"]; + + let expected: Vec = vec![5, 4, 4, 0, 0, 15]; + + izip!(values.iter(), regex.iter()) + .enumerate() + .for_each(|(pos, (&v, &r))| { + // utf8 + let v_sv = ScalarValue::Utf8(Some(v.to_string())); + let regex_sv = ScalarValue::Utf8(Some(r.to_string())); + let expected = expected.get(pos).cloned(); + let re = regexp_instr_with_scalar_values(&[v_sv, regex_sv]); + // let res_exp = re.unwrap(); + match re { + Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => { + assert_eq!(v, expected, "regexp_instr scalar test failed"); + } + _ => panic!("Unexpected result"), + } + + // largeutf8 + let v_sv = ScalarValue::LargeUtf8(Some(v.to_string())); + let regex_sv = ScalarValue::LargeUtf8(Some(r.to_string())); + let re = regexp_instr_with_scalar_values(&[v_sv, regex_sv]); + match re { + Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => { + assert_eq!(v, expected, "regexp_instr scalar test failed"); + } + _ => panic!("Unexpected result"), + } + + // utf8view + let v_sv = ScalarValue::Utf8View(Some(v.to_string())); + let regex_sv = ScalarValue::Utf8View(Some(r.to_string())); + let re = regexp_instr_with_scalar_values(&[v_sv, regex_sv]); + match re { + Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => { + assert_eq!(v, expected, "regexp_instr scalar test failed"); + } + _ => panic!("Unexpected result"), + } + }); + } + + fn test_case_sensitive_regexp_instr_scalar_start() { + let values = ["abcabcabc", "abcabcabc", ""]; + let regex = ["abc", "abc", "gg"]; + let start = [4, 5, 5]; + let expected: Vec = vec![4, 7, 0]; + + izip!(values.iter(), regex.iter(), start.iter()) + .enumerate() + .for_each(|(pos, (&v, &r, &s))| { + // utf8 + let v_sv = ScalarValue::Utf8(Some(v.to_string())); + let regex_sv = ScalarValue::Utf8(Some(r.to_string())); + let start_sv = ScalarValue::Int64(Some(s)); + let expected = expected.get(pos).cloned(); + let re = + regexp_instr_with_scalar_values(&[v_sv, regex_sv, start_sv.clone()]); + match re { + Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => { + assert_eq!(v, expected, "regexp_instr scalar test failed"); + } + _ => panic!("Unexpected result"), + } + + // largeutf8 + let v_sv = ScalarValue::LargeUtf8(Some(v.to_string())); + let regex_sv = ScalarValue::LargeUtf8(Some(r.to_string())); + let start_sv = ScalarValue::Int64(Some(s)); + let re = + regexp_instr_with_scalar_values(&[v_sv, regex_sv, start_sv.clone()]); + match re { + Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => { + assert_eq!(v, expected, "regexp_instr scalar test failed"); + } + _ => panic!("Unexpected result"), + } + + // utf8view + let v_sv = ScalarValue::Utf8View(Some(v.to_string())); + let regex_sv = ScalarValue::Utf8View(Some(r.to_string())); + let start_sv = ScalarValue::Int64(Some(s)); + let re = + regexp_instr_with_scalar_values(&[v_sv, regex_sv, start_sv.clone()]); + match re { + Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => { + assert_eq!(v, expected, "regexp_instr scalar test failed"); + } + _ => panic!("Unexpected result"), + } + }); + } + + fn test_case_sensitive_regexp_instr_scalar_nth() { + let values = ["abcabcabc", "abcabcabc", "abcabcabc", "abcabcabc"]; + let regex = ["abc", "abc", "abc", "abc"]; + let start = [1, 1, 1, 1]; + let nth = [1, 2, 3, 4]; + let expected: Vec = vec![1, 4, 7, 0]; + + izip!(values.iter(), regex.iter(), start.iter(), nth.iter()) + .enumerate() + .for_each(|(pos, (&v, &r, &s, &n))| { + // utf8 + let v_sv = ScalarValue::Utf8(Some(v.to_string())); + let regex_sv = ScalarValue::Utf8(Some(r.to_string())); + let start_sv = ScalarValue::Int64(Some(s)); + let nth_sv = ScalarValue::Int64(Some(n)); + let expected = expected.get(pos).cloned(); + let re = regexp_instr_with_scalar_values(&[ + v_sv, + regex_sv, + start_sv.clone(), + nth_sv.clone(), + ]); + match re { + Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => { + assert_eq!(v, expected, "regexp_instr scalar test failed"); + } + _ => panic!("Unexpected result"), + } + + // largeutf8 + let v_sv = ScalarValue::LargeUtf8(Some(v.to_string())); + let regex_sv = ScalarValue::LargeUtf8(Some(r.to_string())); + let start_sv = ScalarValue::Int64(Some(s)); + let nth_sv = ScalarValue::Int64(Some(n)); + let re = regexp_instr_with_scalar_values(&[ + v_sv, + regex_sv, + start_sv.clone(), + nth_sv.clone(), + ]); + match re { + Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => { + assert_eq!(v, expected, "regexp_instr scalar test failed"); + } + _ => panic!("Unexpected result"), + } + + // utf8view + let v_sv = ScalarValue::Utf8View(Some(v.to_string())); + let regex_sv = ScalarValue::Utf8View(Some(r.to_string())); + let start_sv = ScalarValue::Int64(Some(s)); + let nth_sv = ScalarValue::Int64(Some(n)); + let re = regexp_instr_with_scalar_values(&[ + v_sv, + regex_sv, + start_sv.clone(), + nth_sv.clone(), + ]); + match re { + Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => { + assert_eq!(v, expected, "regexp_instr scalar test failed"); + } + _ => panic!("Unexpected result"), + } + }); + } + + fn test_case_sensitive_regexp_instr_scalar_subexp() { + let values = ["12 abc def ghi 34"]; + let regex = ["(abc) (def) (ghi)"]; + let start = [1]; + let nth = [1]; + let flags = ["i"]; + let subexps = [2]; + let expected: Vec = vec![8]; + + izip!( + values.iter(), + regex.iter(), + start.iter(), + nth.iter(), + flags.iter(), + subexps.iter() + ) + .enumerate() + .for_each(|(pos, (&v, &r, &s, &n, &flag, &subexp))| { + // utf8 + let v_sv = ScalarValue::Utf8(Some(v.to_string())); + let regex_sv = ScalarValue::Utf8(Some(r.to_string())); + let start_sv = ScalarValue::Int64(Some(s)); + let nth_sv = ScalarValue::Int64(Some(n)); + let flags_sv = ScalarValue::Utf8(Some(flag.to_string())); + let subexp_sv = ScalarValue::Int64(Some(subexp)); + let expected = expected.get(pos).cloned(); + let re = regexp_instr_with_scalar_values(&[ + v_sv, + regex_sv, + start_sv.clone(), + nth_sv.clone(), + flags_sv, + subexp_sv.clone(), + ]); + match re { + Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => { + assert_eq!(v, expected, "regexp_instr scalar test failed"); + } + _ => panic!("Unexpected result"), + } + + // largeutf8 + let v_sv = ScalarValue::LargeUtf8(Some(v.to_string())); + let regex_sv = ScalarValue::LargeUtf8(Some(r.to_string())); + let start_sv = ScalarValue::Int64(Some(s)); + let nth_sv = ScalarValue::Int64(Some(n)); + let flags_sv = ScalarValue::LargeUtf8(Some(flag.to_string())); + let subexp_sv = ScalarValue::Int64(Some(subexp)); + let re = regexp_instr_with_scalar_values(&[ + v_sv, + regex_sv, + start_sv.clone(), + nth_sv.clone(), + flags_sv, + subexp_sv.clone(), + ]); + match re { + Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => { + assert_eq!(v, expected, "regexp_instr scalar test failed"); + } + _ => panic!("Unexpected result"), + } + + // utf8view + let v_sv = ScalarValue::Utf8View(Some(v.to_string())); + let regex_sv = ScalarValue::Utf8View(Some(r.to_string())); + let start_sv = ScalarValue::Int64(Some(s)); + let nth_sv = ScalarValue::Int64(Some(n)); + let flags_sv = ScalarValue::Utf8View(Some(flag.to_string())); + let subexp_sv = ScalarValue::Int64(Some(subexp)); + let re = regexp_instr_with_scalar_values(&[ + v_sv, + regex_sv, + start_sv.clone(), + nth_sv.clone(), + flags_sv, + subexp_sv.clone(), + ]); + match re { + Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => { + assert_eq!(v, expected, "regexp_instr scalar test failed"); + } + _ => panic!("Unexpected result"), + } + }); + } + + fn test_case_sensitive_regexp_instr_array() + where + A: From> + Array + 'static, + { + let values = A::from(vec![ + "hello world", + "abcdefg", + "xyz123xyz", + "no match here", + "", + ]); + let regex = A::from(vec!["o", "d", "123", "z", "gg"]); + + let expected = Int64Array::from(vec![5, 4, 4, 0, 0]); + let re = regexp_instr_func(&[Arc::new(values), Arc::new(regex)]).unwrap(); + assert_eq!(re.as_ref(), &expected); + } + + fn test_case_sensitive_regexp_instr_array_start() + where + A: From> + Array + 'static, + { + let values = A::from(vec!["abcabcabc", "abcabcabc", ""]); + let regex = A::from(vec!["abc", "abc", "gg"]); + let start = Int64Array::from(vec![4, 5, 5]); + let expected = Int64Array::from(vec![4, 7, 0]); + + let re = regexp_instr_func(&[Arc::new(values), Arc::new(regex), Arc::new(start)]) + .unwrap(); + assert_eq!(re.as_ref(), &expected); + } + + fn test_case_sensitive_regexp_instr_array_nth() + where + A: From> + Array + 'static, + { + let values = A::from(vec!["abcabcabc", "abcabcabc", "abcabcabc", "abcabcabc"]); + let regex = A::from(vec!["abc", "abc", "abc", "abc"]); + let start = Int64Array::from(vec![1, 1, 1, 1]); + let nth = Int64Array::from(vec![1, 2, 3, 4]); + let expected = Int64Array::from(vec![1, 4, 7, 0]); + + let re = regexp_instr_func(&[ + Arc::new(values), + Arc::new(regex), + Arc::new(start), + Arc::new(nth), + ]) + .unwrap(); + assert_eq!(re.as_ref(), &expected); + } +} diff --git a/datafusion/functions/src/string/chr.rs b/datafusion/functions/src/string/chr.rs index a811de7fccf06..718f96e4f145b 100644 --- a/datafusion/functions/src/string/chr.rs +++ b/datafusion/functions/src/string/chr.rs @@ -49,6 +49,8 @@ pub fn chr(args: &[ArrayRef]) -> Result { Some(integer) => { if integer == 0 { return exec_err!("null character not permitted."); + } else if integer < 0 { + return exec_err!("negative input not permitted."); } else { match core::char::from_u32(integer as u32) { Some(c) => { @@ -132,3 +134,123 @@ impl ScalarUDFImpl for ChrFunc { self.doc() } } + +#[cfg(test)] +mod tests { + use super::*; + use arrow::array::{Array, Int64Array, StringArray}; + use datafusion_common::assert_contains; + + #[test] + fn test_chr_normal() { + let input = Arc::new(Int64Array::from(vec![ + Some(65), // A + Some(66), // B + Some(67), // C + Some(128640), // 🚀 + Some(8364), // € + Some(945), // α + None, // NULL + Some(32), // space + Some(10), // newline + Some(9), // tab + Some(0x10FFFF), // 0x10FFFF, the largest Unicode code point + ])); + let result = chr(&[input]).unwrap(); + let string_array = result.as_any().downcast_ref::().unwrap(); + let expected = [ + "A", + "B", + "C", + "🚀", + "€", + "α", + "", + " ", + "\n", + "\t", + "\u{10ffff}", + ]; + + assert_eq!(string_array.len(), 11); + for (i, e) in expected.iter().enumerate() { + assert_eq!(string_array.value(i), *e); + } + } + + #[test] + fn test_chr_error() { + // chr(0) returns an error + let input = Arc::new(Int64Array::from(vec![0])); + let result = chr(&[input]); + assert!(result.is_err()); + assert_contains!( + result.err().unwrap().to_string(), + "null character not permitted" + ); + + // invalid Unicode code points (too large) + let input = Arc::new(Int64Array::from(vec![i64::MAX])); + let result = chr(&[input]); + assert!(result.is_err()); + assert_contains!( + result.err().unwrap().to_string(), + "requested character too large for encoding" + ); + + // invalid Unicode code points (too large) case 2 + let input = Arc::new(Int64Array::from(vec![0x10FFFF + 1])); + let result = chr(&[input]); + assert!(result.is_err()); + assert_contains!( + result.err().unwrap().to_string(), + "requested character too large for encoding" + ); + + // invalid Unicode code points (surrogate code point) + // link: + let input = Arc::new(Int64Array::from(vec![0xD800 + 1])); + let result = chr(&[input]); + assert!(result.is_err()); + assert_contains!( + result.err().unwrap().to_string(), + "requested character too large for encoding" + ); + + // negative input + let input = Arc::new(Int64Array::from(vec![i64::MIN + 2i64])); // will be 2 if cast to u32 + let result = chr(&[input]); + assert!(result.is_err()); + assert_contains!( + result.err().unwrap().to_string(), + "negative input not permitted" + ); + + // negative input case 2 + let input = Arc::new(Int64Array::from(vec![-1])); + let result = chr(&[input]); + assert!(result.is_err()); + assert_contains!( + result.err().unwrap().to_string(), + "negative input not permitted" + ); + + // one error with valid values after + let input = Arc::new(Int64Array::from(vec![65, 0, 66])); // A, NULL_CHAR, B + let result = chr(&[input]); + assert!(result.is_err()); + assert_contains!( + result.err().unwrap().to_string(), + "null character not permitted" + ); + } + + #[test] + fn test_chr_empty() { + // empty input array + let input = Arc::new(Int64Array::from(Vec::::new())); + let result = chr(&[input]).unwrap(); + let string_array = result.as_any().downcast_ref::().unwrap(); + assert_eq!(string_array.len(), 0); + } +} diff --git a/datafusion/functions/src/string/concat.rs b/datafusion/functions/src/string/concat.rs index 64a527eac1988..06ec82f1b3ed0 100644 --- a/datafusion/functions/src/string/concat.rs +++ b/datafusion/functions/src/string/concat.rs @@ -377,6 +377,7 @@ mod tests { use arrow::array::{Array, LargeStringArray, StringViewArray}; use arrow::array::{ArrayRef, StringArray}; use arrow::datatypes::Field; + use datafusion_common::config::ConfigOptions; use DataType::*; #[test] @@ -485,6 +486,7 @@ mod tests { arg_fields, number_rows: 3, return_field: Field::new("f", Utf8, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = ConcatFunc::new().invoke_with_args(args)?; diff --git a/datafusion/functions/src/string/concat_ws.rs b/datafusion/functions/src/string/concat_ws.rs index 1f45f8501e1f4..c1ecac7ae99d9 100644 --- a/datafusion/functions/src/string/concat_ws.rs +++ b/datafusion/functions/src/string/concat_ws.rs @@ -409,6 +409,7 @@ mod tests { use arrow::array::{Array, ArrayRef, StringArray}; use arrow::datatypes::DataType::Utf8; use arrow::datatypes::Field; + use datafusion_common::config::ConfigOptions; use datafusion_common::Result; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl}; @@ -493,6 +494,7 @@ mod tests { arg_fields, number_rows: 3, return_field: Field::new("f", Utf8, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = ConcatWsFunc::new().invoke_with_args(args)?; @@ -529,6 +531,7 @@ mod tests { arg_fields, number_rows: 3, return_field: Field::new("f", Utf8, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = ConcatWsFunc::new().invoke_with_args(args)?; diff --git a/datafusion/functions/src/string/contains.rs b/datafusion/functions/src/string/contains.rs index 215f8f7a25b91..b2aefb8ee374a 100644 --- a/datafusion/functions/src/string/contains.rs +++ b/datafusion/functions/src/string/contains.rs @@ -153,6 +153,7 @@ mod test { use crate::expr_fn::contains; use arrow::array::{BooleanArray, StringArray}; use arrow::datatypes::{DataType, Field}; + use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, Expr, ScalarFunctionArgs, ScalarUDFImpl}; use std::sync::Arc; @@ -175,6 +176,7 @@ mod test { arg_fields, number_rows: 2, return_field: Field::new("f", DataType::Boolean, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let actual = udf.invoke_with_args(args).unwrap(); diff --git a/datafusion/functions/src/string/lower.rs b/datafusion/functions/src/string/lower.rs index 536c29a7cb253..139275892933a 100644 --- a/datafusion/functions/src/string/lower.rs +++ b/datafusion/functions/src/string/lower.rs @@ -100,6 +100,7 @@ mod tests { use arrow::array::{Array, ArrayRef, StringArray}; use arrow::datatypes::DataType::Utf8; use arrow::datatypes::Field; + use datafusion_common::config::ConfigOptions; use std::sync::Arc; fn to_lower(input: ArrayRef, expected: ArrayRef) -> Result<()> { @@ -111,6 +112,7 @@ mod tests { args: vec![ColumnarValue::Array(input)], arg_fields, return_field: Field::new("f", Utf8, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = match func.invoke_with_args(args)? { diff --git a/datafusion/functions/src/string/upper.rs b/datafusion/functions/src/string/upper.rs index 882fb45eda4af..99d505c5aafda 100644 --- a/datafusion/functions/src/string/upper.rs +++ b/datafusion/functions/src/string/upper.rs @@ -99,6 +99,7 @@ mod tests { use arrow::array::{Array, ArrayRef, StringArray}; use arrow::datatypes::DataType::Utf8; use arrow::datatypes::Field; + use datafusion_common::config::ConfigOptions; use std::sync::Arc; fn to_upper(input: ArrayRef, expected: ArrayRef) -> Result<()> { @@ -110,6 +111,7 @@ mod tests { args: vec![ColumnarValue::Array(input)], arg_fields: vec![arg_field], return_field: Field::new("f", Utf8, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = match func.invoke_with_args(args)? { diff --git a/datafusion/functions/src/unicode/find_in_set.rs b/datafusion/functions/src/unicode/find_in_set.rs index 8b00c7be1ccf8..3429a8293c154 100644 --- a/datafusion/functions/src/unicode/find_in_set.rs +++ b/datafusion/functions/src/unicode/find_in_set.rs @@ -349,6 +349,7 @@ mod tests { use crate::utils::test::test_function; use arrow::array::{Array, Int32Array, StringArray}; use arrow::datatypes::{DataType::Int32, Field}; + use datafusion_common::config::ConfigOptions; use datafusion_common::{Result, ScalarValue}; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl}; use std::sync::Arc; @@ -483,6 +484,7 @@ mod tests { arg_fields, number_rows: cardinality, return_field: Field::new("f", return_type, true).into(), + config_options: Arc::new(ConfigOptions::default()), }); assert!(result.is_ok()); diff --git a/datafusion/functions/src/unicode/initcap.rs b/datafusion/functions/src/unicode/initcap.rs index c9b0cb77b0969..b70c00e757ee7 100644 --- a/datafusion/functions/src/unicode/initcap.rs +++ b/datafusion/functions/src/unicode/initcap.rs @@ -131,10 +131,11 @@ fn initcap(args: &[ArrayRef]) -> Result { string_array.value_data().len(), ); + let mut container = String::new(); string_array.iter().for_each(|str| match str { Some(s) => { - let initcap_str = initcap_string(s); - builder.append_value(initcap_str); + initcap_string(s, &mut container); + builder.append_value(&container); } None => builder.append_null(), }); @@ -147,10 +148,11 @@ fn initcap_utf8view(args: &[ArrayRef]) -> Result { let mut builder = StringViewBuilder::with_capacity(string_view_array.len()); + let mut container = String::new(); string_view_array.iter().for_each(|str| match str { Some(s) => { - let initcap_str = initcap_string(s); - builder.append_value(initcap_str); + initcap_string(s, &mut container); + builder.append_value(&container); } None => builder.append_null(), }); @@ -158,31 +160,29 @@ fn initcap_utf8view(args: &[ArrayRef]) -> Result { Ok(Arc::new(builder.finish()) as ArrayRef) } -fn initcap_string(input: &str) -> String { - let mut result = String::with_capacity(input.len()); +fn initcap_string(input: &str, container: &mut String) { + container.clear(); let mut prev_is_alphanumeric = false; if input.is_ascii() { for c in input.chars() { if prev_is_alphanumeric { - result.push(c.to_ascii_lowercase()); + container.push(c.to_ascii_lowercase()); } else { - result.push(c.to_ascii_uppercase()); + container.push(c.to_ascii_uppercase()); }; prev_is_alphanumeric = c.is_ascii_alphanumeric(); } } else { for c in input.chars() { if prev_is_alphanumeric { - result.extend(c.to_lowercase()); + container.extend(c.to_lowercase()); } else { - result.extend(c.to_uppercase()); + container.extend(c.to_uppercase()); } prev_is_alphanumeric = c.is_alphanumeric(); } } - - result } #[cfg(test)] diff --git a/datafusion/functions/src/utils.rs b/datafusion/functions/src/utils.rs index 583ff48bff39d..0e9ef8dacd51a 100644 --- a/datafusion/functions/src/utils.rs +++ b/datafusion/functions/src/utils.rs @@ -128,8 +128,9 @@ pub mod test { /// $EXPECTED_TYPE is the expected value type /// $EXPECTED_DATA_TYPE is the expected result type /// $ARRAY_TYPE is the column type after function applied + /// $CONFIG_OPTIONS config options to pass to function macro_rules! test_function { - ($FUNC:expr, $ARGS:expr, $EXPECTED:expr, $EXPECTED_TYPE:ty, $EXPECTED_DATA_TYPE:expr, $ARRAY_TYPE:ident) => { + ($FUNC:expr, $ARGS:expr, $EXPECTED:expr, $EXPECTED_TYPE:ty, $EXPECTED_DATA_TYPE:expr, $ARRAY_TYPE:ident, $CONFIG_OPTIONS:expr) => { let expected: Result> = $EXPECTED; let func = $FUNC; @@ -174,7 +175,13 @@ pub mod test { let return_type = return_field.data_type(); assert_eq!(return_type, &$EXPECTED_DATA_TYPE); - let result = func.invoke_with_args(datafusion_expr::ScalarFunctionArgs{args: $ARGS, arg_fields, number_rows: cardinality, return_field}); + let result = func.invoke_with_args(datafusion_expr::ScalarFunctionArgs{ + args: $ARGS, + arg_fields, + number_rows: cardinality, + return_field, + config_options: $CONFIG_OPTIONS + }); assert_eq!(result.is_ok(), true, "function returned an error: {}", result.unwrap_err()); let result = result.unwrap().to_array(cardinality).expect("Failed to convert to array"); @@ -198,7 +205,13 @@ pub mod test { let return_field = return_field.unwrap(); // invoke is expected error - cannot use .expect_err() due to Debug not being implemented - match func.invoke_with_args(datafusion_expr::ScalarFunctionArgs{args: $ARGS, arg_fields, number_rows: cardinality, return_field}) { + match func.invoke_with_args(datafusion_expr::ScalarFunctionArgs{ + args: $ARGS, + arg_fields, + number_rows: cardinality, + return_field, + config_options: $CONFIG_OPTIONS}) + { Ok(_) => assert!(false, "expected error"), Err(error) => { assert!(expected_error.strip_backtrace().starts_with(&error.strip_backtrace())); @@ -208,6 +221,18 @@ pub mod test { } }; }; + + ($FUNC:expr, $ARGS:expr, $EXPECTED:expr, $EXPECTED_TYPE:ty, $EXPECTED_DATA_TYPE:expr, $ARRAY_TYPE:ident) => { + test_function!( + $FUNC, + $ARGS, + $EXPECTED, + $EXPECTED_TYPE, + $EXPECTED_DATA_TYPE, + $ARRAY_TYPE, + std::sync::Arc::new(datafusion_common::config::ConfigOptions::default()) + ) + }; } use arrow::datatypes::DataType; diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs index b5a3e9a2d5853..a98b0fdcc3d36 100644 --- a/datafusion/optimizer/src/analyzer/type_coercion.rs +++ b/datafusion/optimizer/src/analyzer/type_coercion.rs @@ -1597,7 +1597,7 @@ mod test { vec![lit(10i64)], false, None, - None, + vec![], None, )); let plan = LogicalPlan::Projection(Projection::try_new(vec![udaf], empty)?); @@ -1632,7 +1632,7 @@ mod test { vec![lit("10")], false, None, - None, + vec![], None, )); @@ -1651,7 +1651,7 @@ mod test { vec![lit(12f64)], false, None, - None, + vec![], None, )); let plan = LogicalPlan::Projection(Projection::try_new(vec![agg_expr], empty)?); @@ -1670,7 +1670,7 @@ mod test { vec![cast(col("a"), DataType::Float64)], false, None, - None, + vec![], None, )); let plan = LogicalPlan::Projection(Projection::try_new(vec![agg_expr], empty)?); @@ -1692,7 +1692,7 @@ mod test { vec![lit("1")], false, None, - None, + vec![], None, )); let err = Projection::try_new(vec![agg_expr], empty) @@ -1727,7 +1727,7 @@ mod test { let empty = empty_with_type(DataType::Int64); let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty)?); assert_analyzed_plan_eq!( - plan, + plan, @r" Projection: a IN ([CAST(Int32(1) AS Int64), CAST(Int8(4) AS Int64), Int64(8)]) EmptyRelation @@ -1744,7 +1744,7 @@ mod test { })); let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty)?); assert_analyzed_plan_eq!( - plan, + plan, @r" Projection: CAST(a AS Decimal128(24, 4)) IN ([CAST(Int32(1) AS Decimal128(24, 4)), CAST(Int8(4) AS Decimal128(24, 4)), CAST(Int64(8) AS Decimal128(24, 4))]) EmptyRelation diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs index 6a49e5d22087f..88d51e1adea34 100644 --- a/datafusion/optimizer/src/common_subexpr_eliminate.rs +++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs @@ -909,7 +909,7 @@ mod test { vec![inner], false, None, - None, + vec![], None, )) }; diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs b/datafusion/optimizer/src/optimize_projections/mod.rs index 023ee4ea5a84b..7b7be82b70ca0 100644 --- a/datafusion/optimizer/src/optimize_projections/mod.rs +++ b/datafusion/optimizer/src/optimize_projections/mod.rs @@ -29,9 +29,9 @@ use datafusion_common::{ HashMap, JoinType, Result, }; use datafusion_expr::expr::Alias; -use datafusion_expr::Unnest; use datafusion_expr::{ - logical_plan::LogicalPlan, Aggregate, Distinct, Expr, Projection, TableScan, Window, + logical_plan::LogicalPlan, Aggregate, Distinct, Expr, Projection, TableScan, Unnest, + Window, }; use crate::optimize_projections::required_indices::RequiredIndices; @@ -376,11 +376,22 @@ fn optimize_projections( ); } LogicalPlan::Unnest(Unnest { - dependency_indices, .. + input, + dependency_indices, + .. }) => { - vec![RequiredIndices::new_from_indices( - dependency_indices.clone(), - )] + // at least provide the indices for the exec-columns as a starting point + let required_indices = + RequiredIndices::new().with_plan_exprs(&plan, input.schema())?; + + // Add additional required indices from the parent + let mut additional_necessary_child_indices = Vec::new(); + indices.indices().iter().for_each(|idx| { + if let Some(index) = dependency_indices.get(*idx) { + additional_necessary_child_indices.push(*index); + } + }); + vec![required_indices.append(&additional_necessary_child_indices)] } }; diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs index bcb867f6e7fa5..35ec7d074d5f4 100644 --- a/datafusion/optimizer/src/push_down_filter.rs +++ b/datafusion/optimizer/src/push_down_filter.rs @@ -20,6 +20,7 @@ use std::collections::{HashMap, HashSet}; use std::sync::Arc; +use arrow::datatypes::DataType; use indexmap::IndexSet; use itertools::Itertools; @@ -875,14 +876,37 @@ impl OptimizerRule for PushDownFilter { let predicates = split_conjunction_owned(filter.predicate.clone()); let mut non_unnest_predicates = vec![]; let mut unnest_predicates = vec![]; + let mut unnest_struct_columns = vec![]; + + for idx in &unnest.struct_type_columns { + let (sub_qualifier, field) = + unnest.input.schema().qualified_field(*idx); + let field_name = field.name().clone(); + + if let DataType::Struct(children) = field.data_type() { + for child in children { + let child_name = child.name().clone(); + unnest_struct_columns.push(Column::new( + sub_qualifier.cloned(), + format!("{field_name}.{child_name}"), + )); + } + } + } + for predicate in predicates { // collect all the Expr::Column in predicate recursively let mut accum: HashSet = HashSet::new(); expr_to_columns(&predicate, &mut accum)?; - if unnest.list_type_columns.iter().any(|(_, unnest_list)| { - accum.contains(&unnest_list.output_column) - }) { + let contains_list_columns = + unnest.list_type_columns.iter().any(|(_, unnest_list)| { + accum.contains(&unnest_list.output_column) + }); + let contains_struct_columns = + unnest_struct_columns.iter().any(|c| accum.contains(c)); + + if contains_list_columns || contains_struct_columns { unnest_predicates.push(predicate); } else { non_unnest_predicates.push(predicate); diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 26ac4a30b7047..9a3a8bcd23a7f 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -2181,6 +2181,7 @@ mod tests { }; use datafusion_functions_window_common::field::WindowUDFFieldArgs; use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; + use std::hash::{DefaultHasher, Hash, Hasher}; use std::{ collections::HashMap, ops::{BitAnd, BitOr, BitXor}, @@ -2432,7 +2433,7 @@ mod tests { #[test] fn test_simplify_multiply_by_null() { - let null = Expr::Literal(ScalarValue::Null, None); + let null = lit(ScalarValue::Null); // A * null --> null { let expr = col("c2") * null.clone(); @@ -4322,7 +4323,7 @@ mod tests { vec![], false, None, - None, + vec![], None, )); @@ -4336,7 +4337,7 @@ mod tests { vec![], false, None, - None, + vec![], None, )); @@ -4404,6 +4405,21 @@ mod tests { None } } + + fn equals(&self, other: &dyn AggregateUDFImpl) -> bool { + let Some(other) = other.as_any().downcast_ref::() else { + return false; + }; + let Self { simplify } = self; + simplify == &other.simplify + } + + fn hash_value(&self) -> u64 { + let Self { simplify } = self; + let mut hasher = DefaultHasher::new(); + simplify.hash(&mut hasher); + hasher.finish() + } } #[test] @@ -4474,6 +4490,22 @@ mod tests { fn field(&self, _field_args: WindowUDFFieldArgs) -> Result { unimplemented!("not needed for tests") } + + fn equals(&self, other: &dyn WindowUDFImpl) -> bool { + let Some(other) = other.as_any().downcast_ref::() else { + return false; + }; + let Self { simplify } = self; + simplify == &other.simplify + } + + fn hash_value(&self) -> u64 { + let Self { simplify } = self; + let mut hasher = DefaultHasher::new(); + std::any::type_name::().hash(&mut hasher); + simplify.hash(&mut hasher); + hasher.finish() + } } #[derive(Debug)] struct VolatileUdf { diff --git a/datafusion/optimizer/src/simplify_expressions/utils.rs b/datafusion/optimizer/src/simplify_expressions/utils.rs index 4df0e125eb18c..2f7dadcebaa49 100644 --- a/datafusion/optimizer/src/simplify_expressions/utils.rs +++ b/datafusion/optimizer/src/simplify_expressions/utils.rs @@ -17,6 +17,7 @@ //! Utility functions for expression simplification +use arrow::datatypes::i256; use datafusion_common::{internal_err, Result, ScalarValue}; use datafusion_expr::{ expr::{Between, BinaryExpr, InList}, @@ -150,6 +151,11 @@ pub fn is_zero(s: &Expr) -> bool { Expr::Literal(ScalarValue::Float32(Some(v)), _) if *v == 0. => true, Expr::Literal(ScalarValue::Float64(Some(v)), _) if *v == 0. => true, Expr::Literal(ScalarValue::Decimal128(Some(v), _p, _s), _) if *v == 0 => true, + Expr::Literal(ScalarValue::Decimal256(Some(v), _p, _s), _) + if *v == i256::ZERO => + { + true + } _ => false, } } @@ -173,6 +179,13 @@ pub fn is_one(s: &Expr) -> bool { .map(|x| x == v) .unwrap_or_default() } + Expr::Literal(ScalarValue::Decimal256(Some(v), _p, s), _) => { + *s >= 0 + && match i256::from(10).checked_pow(*s as u32) { + Some(res) => res == *v, + None => false, + } + } _ => false, } } @@ -365,3 +378,78 @@ pub fn distribute_negation(expr: Expr) -> Expr { _ => Expr::Negative(Box::new(expr)), } } + +#[cfg(test)] +mod tests { + use super::{is_one, is_zero}; + use arrow::datatypes::i256; + use datafusion_common::ScalarValue; + use datafusion_expr::lit; + + #[test] + fn test_is_zero() { + assert!(is_zero(&lit(ScalarValue::Int8(Some(0))))); + assert!(is_zero(&lit(ScalarValue::Float32(Some(0.0))))); + assert!(is_zero(&lit(ScalarValue::Decimal128( + Some(i128::from(0)), + 9, + 0 + )))); + assert!(is_zero(&lit(ScalarValue::Decimal128( + Some(i128::from(0)), + 9, + 5 + )))); + assert!(is_zero(&lit(ScalarValue::Decimal256( + Some(i256::ZERO), + 9, + 0 + )))); + assert!(is_zero(&lit(ScalarValue::Decimal256( + Some(i256::ZERO), + 9, + 5 + )))); + } + + #[test] + fn test_is_one() { + assert!(is_one(&lit(ScalarValue::Int8(Some(1))))); + assert!(is_one(&lit(ScalarValue::Float32(Some(1.0))))); + assert!(is_one(&lit(ScalarValue::Decimal128( + Some(i128::from(1)), + 9, + 0 + )))); + assert!(is_one(&lit(ScalarValue::Decimal128( + Some(i128::from(10)), + 9, + 1 + )))); + assert!(is_one(&lit(ScalarValue::Decimal128( + Some(i128::from(100)), + 9, + 2 + )))); + assert!(is_one(&lit(ScalarValue::Decimal256( + Some(i256::from(1)), + 9, + 0 + )))); + assert!(is_one(&lit(ScalarValue::Decimal256( + Some(i256::from(10)), + 9, + 1 + )))); + assert!(is_one(&lit(ScalarValue::Decimal256( + Some(i256::from(100)), + 9, + 2 + )))); + assert!(!is_one(&lit(ScalarValue::Decimal256( + Some(i256::from(100)), + 9, + -1 + )))); + } +} diff --git a/datafusion/optimizer/src/single_distinct_to_groupby.rs b/datafusion/optimizer/src/single_distinct_to_groupby.rs index 50783a214342e..e9a23c7c4dc50 100644 --- a/datafusion/optimizer/src/single_distinct_to_groupby.rs +++ b/datafusion/optimizer/src/single_distinct_to_groupby.rs @@ -79,7 +79,7 @@ fn is_single_distinct_agg(aggr_expr: &[Expr]) -> Result { }, }) = expr { - if filter.is_some() || order_by.is_some() { + if filter.is_some() || !order_by.is_empty() { return Ok(false); } aggregate_count += 1; @@ -200,7 +200,7 @@ impl OptimizerRule for SingleDistinctToGroupBy { vec![col(SINGLE_DISTINCT_ALIAS)], false, // intentional to remove distinct here None, - None, + vec![], None, ))) // if the aggregate function is not distinct, we need to rewrite it like two phase aggregation @@ -213,7 +213,7 @@ impl OptimizerRule for SingleDistinctToGroupBy { args, false, None, - None, + vec![], None, )) .alias(&alias_str), @@ -223,7 +223,7 @@ impl OptimizerRule for SingleDistinctToGroupBy { vec![col(&alias_str)], false, None, - None, + vec![], None, ))) } @@ -296,7 +296,7 @@ mod tests { vec![expr], true, None, - None, + vec![], None, )) } @@ -627,7 +627,7 @@ mod tests { vec![col("a")], false, Some(Box::new(col("a").gt(lit(5)))), - None, + vec![], None, )); let plan = LogicalPlanBuilder::from(table_scan) @@ -678,7 +678,7 @@ mod tests { vec![col("a")], false, None, - Some(vec![col("a").sort(true, false)]), + vec![col("a").sort(true, false)], None, )); let plan = LogicalPlanBuilder::from(table_scan) diff --git a/datafusion/physical-expr/src/aggregate.rs b/datafusion/physical-expr/src/aggregate.rs index 9175c01274cba..ed304811826d7 100644 --- a/datafusion/physical-expr/src/aggregate.rs +++ b/datafusion/physical-expr/src/aggregate.rs @@ -616,10 +616,42 @@ impl AggregateFunctionExpr { /// Returns `Some(Arc)` if re-write is supported, otherwise returns `None`. pub fn with_new_expressions( &self, - _args: Vec>, - _order_by_exprs: Vec>, + args: Vec>, + order_by_exprs: Vec>, ) -> Option { - None + if args.len() != self.args.len() + || (self.order_sensitivity() != AggregateOrderSensitivity::Insensitive + && order_by_exprs.len() != self.order_bys.len()) + { + return None; + } + + let new_order_bys = self + .order_bys + .iter() + .zip(order_by_exprs) + .map(|(req, new_expr)| PhysicalSortExpr { + expr: new_expr, + options: req.options, + }) + .collect(); + + Some(AggregateFunctionExpr { + fun: self.fun.clone(), + args, + return_field: Arc::clone(&self.return_field), + name: self.name.clone(), + // TODO: Human name should be updated after re-write to not mislead + human_display: self.human_display.clone(), + schema: self.schema.clone(), + order_bys: new_order_bys, + ignore_nulls: self.ignore_nulls, + ordering_fields: self.ordering_fields.clone(), + is_distinct: self.is_distinct, + is_reversed: false, + input_fields: self.input_fields.clone(), + is_nullable: self.is_nullable, + }) } /// If this function is max, return (output_field, true) diff --git a/datafusion/physical-expr/src/async_scalar_function.rs b/datafusion/physical-expr/src/async_scalar_function.rs index 547b9c13da622..00134565ea443 100644 --- a/datafusion/physical-expr/src/async_scalar_function.rs +++ b/datafusion/physical-expr/src/async_scalar_function.rs @@ -114,7 +114,7 @@ impl AsyncFuncExpr { pub async fn invoke_with_args( &self, batch: &RecordBatch, - option: &ConfigOptions, + config_options: Arc, ) -> Result { let Some(scalar_function_expr) = self.func.as_any().downcast_ref::() @@ -162,15 +162,13 @@ impl AsyncFuncExpr { .collect::>>()?; result_batches.push( async_udf - .invoke_async_with_args( - ScalarFunctionArgs { - args, - arg_fields: arg_fields.clone(), - number_rows: current_batch.num_rows(), - return_field: Arc::clone(&self.return_field), - }, - option, - ) + .invoke_async_with_args(ScalarFunctionArgs { + args, + arg_fields: arg_fields.clone(), + number_rows: current_batch.num_rows(), + return_field: Arc::clone(&self.return_field), + config_options: Arc::clone(&config_options), + }) .await?, ); } @@ -183,15 +181,13 @@ impl AsyncFuncExpr { result_batches.push( async_udf - .invoke_async_with_args( - ScalarFunctionArgs { - args: args.to_vec(), - arg_fields, - number_rows: batch.num_rows(), - return_field: Arc::clone(&self.return_field), - }, - option, - ) + .invoke_async_with_args(ScalarFunctionArgs { + args: args.to_vec(), + arg_fields, + number_rows: batch.num_rows(), + return_field: Arc::clone(&self.return_field), + config_options: Arc::clone(&config_options), + }) .await?, ); } diff --git a/datafusion/physical-expr/src/equivalence/ordering.rs b/datafusion/physical-expr/src/equivalence/ordering.rs index 875c2a76e5eb2..aa65c4a80ae9a 100644 --- a/datafusion/physical-expr/src/equivalence/ordering.rs +++ b/datafusion/physical-expr/src/equivalence/ordering.rs @@ -338,6 +338,7 @@ mod tests { use arrow::compute::SortOptions; use arrow::datatypes::{DataType, Field, Schema}; + use datafusion_common::config::ConfigOptions; use datafusion_common::Result; use datafusion_expr::{Operator, ScalarUDF}; @@ -390,16 +391,19 @@ mod tests { Arc::clone(&test_fun), vec![Arc::clone(col_a)], &test_schema, + Arc::new(ConfigOptions::default()), )?) as PhysicalExprRef; let floor_f = Arc::new(ScalarFunctionExpr::try_new( Arc::clone(&test_fun), vec![Arc::clone(col_f)], &test_schema, + Arc::new(ConfigOptions::default()), )?) as PhysicalExprRef; let exp_a = Arc::new(ScalarFunctionExpr::try_new( Arc::clone(&test_fun), vec![Arc::clone(col_a)], &test_schema, + Arc::new(ConfigOptions::default()), )?) as PhysicalExprRef; let a_plus_b = Arc::new(BinaryExpr::new( diff --git a/datafusion/physical-expr/src/equivalence/projection.rs b/datafusion/physical-expr/src/equivalence/projection.rs index 38bb1fef8074a..6fe56052292f0 100644 --- a/datafusion/physical-expr/src/equivalence/projection.rs +++ b/datafusion/physical-expr/src/equivalence/projection.rs @@ -167,6 +167,7 @@ mod tests { use arrow::compute::SortOptions; use arrow::datatypes::{DataType, Field, Schema, TimeUnit}; + use datafusion_common::config::ConfigOptions; use datafusion_expr::{Operator, ScalarUDF}; #[test] @@ -689,6 +690,7 @@ mod tests { test_fun, vec![Arc::clone(col_c)], &schema, + Arc::new(ConfigOptions::default()), )?) as PhysicalExprRef; let option_asc = SortOptions { diff --git a/datafusion/physical-expr/src/equivalence/properties/dependency.rs b/datafusion/physical-expr/src/equivalence/properties/dependency.rs index 4554e36f766dc..26d5d32c65121 100644 --- a/datafusion/physical-expr/src/equivalence/properties/dependency.rs +++ b/datafusion/physical-expr/src/equivalence/properties/dependency.rs @@ -396,6 +396,7 @@ mod tests { use arrow::compute::SortOptions; use arrow::datatypes::{DataType, Field, Schema, TimeUnit}; + use datafusion_common::config::ConfigOptions; use datafusion_common::{Constraint, Constraints, Result}; use datafusion_expr::sort_properties::SortProperties; use datafusion_expr::Operator; @@ -1035,6 +1036,7 @@ mod tests { concat(), vec![Arc::clone(&col_a), Arc::clone(&col_b)], Field::new("f", DataType::Utf8, true).into(), + Arc::new(ConfigOptions::default()), )); // Assume existing ordering is [c ASC, a ASC, b ASC] @@ -1125,6 +1127,7 @@ mod tests { concat(), vec![Arc::clone(&col_a), Arc::clone(&col_b)], Field::new("f", DataType::Utf8, true).into(), + Arc::new(ConfigOptions::default()), )) as _; // Assume existing ordering is [concat(a, b) ASC, a ASC, b ASC] diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs index 798e68a459ce6..eff948c6a0f43 100644 --- a/datafusion/physical-expr/src/expressions/binary.rs +++ b/datafusion/physical-expr/src/expressions/binary.rs @@ -387,8 +387,8 @@ impl PhysicalExpr for BinaryExpr { let input_schema = schema.as_ref(); if left_data_type.is_nested() { - if right_data_type != left_data_type { - return internal_err!("type mismatch"); + if !left_data_type.equals_datatype(&right_data_type) { + return internal_err!("Cannot evaluate binary expression because of type mismatch: left {}, right {} ", left_data_type, right_data_type); } return apply_cmp_for_nested(self.op, &lhs, &rhs); } @@ -5399,4 +5399,65 @@ mod tests { Interval::make(Some(false), Some(false)).unwrap() ); } + + #[test] + fn test_evaluate_nested_type() { + let batch_schema = Arc::new(Schema::new(vec![ + Field::new( + "a", + DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))), + true, + ), + Field::new( + "b", + DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))), + true, + ), + ])); + + let mut list_builder_a = ListBuilder::new(Int32Builder::new()); + + list_builder_a.append_value([Some(1)]); + list_builder_a.append_value([Some(2)]); + list_builder_a.append_value([]); + list_builder_a.append_value([None]); + + let list_array_a: ArrayRef = Arc::new(list_builder_a.finish()); + + let mut list_builder_b = ListBuilder::new(Int32Builder::new()); + + list_builder_b.append_value([Some(1)]); + list_builder_b.append_value([Some(2)]); + list_builder_b.append_value([]); + list_builder_b.append_value([None]); + + let list_array_b: ArrayRef = Arc::new(list_builder_b.finish()); + + let batch = + RecordBatch::try_new(batch_schema, vec![list_array_a, list_array_b]).unwrap(); + + let schema = Arc::new(Schema::new(vec![ + Field::new( + "a", + DataType::List(Arc::new(Field::new("foo", DataType::Int32, true))), + true, + ), + Field::new( + "b", + DataType::List(Arc::new(Field::new("bar", DataType::Int32, true))), + true, + ), + ])); + + let a = Arc::new(Column::new("a", 0)) as _; + let b = Arc::new(Column::new("b", 1)) as _; + + let eq_expr = + binary_expr(Arc::clone(&a), Operator::Eq, Arc::clone(&b), &schema).unwrap(); + + let eq_result = eq_expr.evaluate(&batch).unwrap(); + let expected = + BooleanArray::from_iter(vec![Some(true), Some(true), Some(true), Some(true)]); + assert_eq!(eq_result.into_array(4).unwrap().as_boolean(), &expected); + } } diff --git a/datafusion/physical-expr/src/expressions/column.rs b/datafusion/physical-expr/src/expressions/column.rs index 5a11783a87e90..c9f3fb00f019e 100644 --- a/datafusion/physical-expr/src/expressions/column.rs +++ b/datafusion/physical-expr/src/expressions/column.rs @@ -204,7 +204,6 @@ mod test { use arrow::array::StringArray; use arrow::datatypes::{DataType, Field, Schema}; use arrow::record_batch::RecordBatch; - use datafusion_common::Result; use std::sync::Arc; @@ -214,8 +213,9 @@ mod test { let col = Column::new("id", 9); let error = col.data_type(&schema).expect_err("error").strip_backtrace(); assert!("Internal error: PhysicalExpr Column references column 'id' at index 9 (zero-based) \ - but input schema only has 1 columns: [\"foo\"].\nThis was likely caused by a bug in \ - DataFusion's code and we would welcome that you file an bug report in our issue tracker".starts_with(&error)) + but input schema only has 1 columns: [\"foo\"].\nThis issue was likely caused by a bug \ + in DataFusion's code. Please help us to resolve this by filing a bug report \ + in our issue tracker: https://github.com/apache/datafusion/issues".starts_with(&error)) } #[test] @@ -224,20 +224,21 @@ mod test { let col = Column::new("id", 9); let error = col.nullable(&schema).expect_err("error").strip_backtrace(); assert!("Internal error: PhysicalExpr Column references column 'id' at index 9 (zero-based) \ - but input schema only has 1 columns: [\"foo\"].\nThis was likely caused by a bug in \ - DataFusion's code and we would welcome that you file an bug report in our issue tracker".starts_with(&error)) + but input schema only has 1 columns: [\"foo\"].\nThis issue was likely caused by a bug \ + in DataFusion's code. Please help us to resolve this by filing a bug report \ + in our issue tracker: https://github.com/apache/datafusion/issues".starts_with(&error)); } #[test] - fn out_of_bounds_evaluate() -> Result<()> { + fn out_of_bounds_evaluate() { let schema = Schema::new(vec![Field::new("foo", DataType::Utf8, true)]); let data: StringArray = vec!["data"].into(); - let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(data)])?; + let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(data)]).unwrap(); let col = Column::new("id", 9); let error = col.evaluate(&batch).expect_err("error").strip_backtrace(); assert!("Internal error: PhysicalExpr Column references column 'id' at index 9 (zero-based) \ - but input schema only has 1 columns: [\"foo\"].\nThis was likely caused by a bug in \ - DataFusion's code and we would welcome that you file an bug report in our issue tracker".starts_with(&error)); - Ok(()) + but input schema only has 1 columns: [\"foo\"].\nThis issue was likely caused by a bug \ + in DataFusion's code. Please help us to resolve this by filing a bug report \ + in our issue tracker: https://github.com/apache/datafusion/issues".starts_with(&error)); } } diff --git a/datafusion/physical-expr/src/expressions/literal.rs b/datafusion/physical-expr/src/expressions/literal.rs index 1a2ebf000f1df..6e425ee439d69 100644 --- a/datafusion/physical-expr/src/expressions/literal.rs +++ b/datafusion/physical-expr/src/expressions/literal.rs @@ -36,7 +36,7 @@ use datafusion_expr_common::interval_arithmetic::Interval; use datafusion_expr_common::sort_properties::{ExprProperties, SortProperties}; /// Represents a literal value -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq, Clone)] pub struct Literal { value: ScalarValue, field: FieldRef, diff --git a/datafusion/physical-expr/src/expressions/try_cast.rs b/datafusion/physical-expr/src/expressions/try_cast.rs index b593dfe83209d..c5a58d5c6d853 100644 --- a/datafusion/physical-expr/src/expressions/try_cast.rs +++ b/datafusion/physical-expr/src/expressions/try_cast.rs @@ -22,12 +22,12 @@ use std::sync::Arc; use crate::PhysicalExpr; use arrow::compute; -use arrow::compute::{cast_with_options, CastOptions}; +use arrow::compute::CastOptions; use arrow::datatypes::{DataType, FieldRef, Schema}; use arrow::record_batch::RecordBatch; use compute::can_cast_types; use datafusion_common::format::DEFAULT_FORMAT_OPTIONS; -use datafusion_common::{not_impl_err, Result, ScalarValue}; +use datafusion_common::{not_impl_err, Result}; use datafusion_expr::ColumnarValue; /// TRY_CAST expression casts an expression to a specific data type and returns NULL on invalid cast @@ -96,18 +96,7 @@ impl PhysicalExpr for TryCastExpr { safe: true, format_options: DEFAULT_FORMAT_OPTIONS, }; - match value { - ColumnarValue::Array(array) => { - let cast = cast_with_options(&array, &self.cast_type, &options)?; - Ok(ColumnarValue::Array(cast)) - } - ColumnarValue::Scalar(scalar) => { - let array = scalar.to_array()?; - let cast_array = cast_with_options(&array, &self.cast_type, &options)?; - let cast_scalar = ScalarValue::try_from_array(&cast_array, 0)?; - Ok(ColumnarValue::Scalar(cast_scalar)) - } - } + value.cast_to(&self.cast_type, Some(&options)) } fn return_field(&self, input_schema: &Schema) -> Result { diff --git a/datafusion/physical-expr/src/lib.rs b/datafusion/physical-expr/src/lib.rs index 03fc77f156d95..845c358d7e58b 100644 --- a/datafusion/physical-expr/src/lib.rs +++ b/datafusion/physical-expr/src/lib.rs @@ -70,7 +70,7 @@ pub use datafusion_physical_expr_common::sort_expr::{ pub use planner::{create_physical_expr, create_physical_exprs}; pub use scalar_function::ScalarFunctionExpr; -pub use schema_rewriter::PhysicalExprSchemaRewriter; +pub use schema_rewriter::DefaultPhysicalExprAdapter; pub use utils::{conjunction, conjunction_opt, split_conjunction}; // For backwards compatibility diff --git a/datafusion/physical-expr/src/planner.rs b/datafusion/physical-expr/src/planner.rs index fbc19b1202ee8..fb491341f81d6 100644 --- a/datafusion/physical-expr/src/planner.rs +++ b/datafusion/physical-expr/src/planner.rs @@ -24,6 +24,7 @@ use crate::{ }; use arrow::datatypes::Schema; +use datafusion_common::config::ConfigOptions; use datafusion_common::{ exec_err, not_impl_err, plan_err, DFSchema, Result, ScalarValue, ToDFSchema, }; @@ -317,11 +318,16 @@ pub fn create_physical_expr( Expr::ScalarFunction(ScalarFunction { func, args }) => { let physical_args = create_physical_exprs(args, input_dfschema, execution_props)?; + let config_options = match execution_props.config_options.as_ref() { + Some(config_options) => Arc::clone(config_options), + None => Arc::new(ConfigOptions::default()), + }; Ok(Arc::new(ScalarFunctionExpr::try_new( Arc::clone(func), physical_args, input_schema, + config_options, )?)) } Expr::Between(Between { diff --git a/datafusion/physical-expr/src/scalar_function.rs b/datafusion/physical-expr/src/scalar_function.rs index d014bbb74caa1..f2bb09b1009c5 100644 --- a/datafusion/physical-expr/src/scalar_function.rs +++ b/datafusion/physical-expr/src/scalar_function.rs @@ -31,7 +31,7 @@ use std::any::Any; use std::fmt::{self, Debug, Formatter}; -use std::hash::Hash; +use std::hash::{Hash, Hasher}; use std::sync::Arc; use crate::expressions::Literal; @@ -39,6 +39,7 @@ use crate::PhysicalExpr; use arrow::array::{Array, RecordBatch}; use arrow::datatypes::{DataType, FieldRef, Schema}; +use datafusion_common::config::ConfigOptions; use datafusion_common::{internal_err, Result, ScalarValue}; use datafusion_expr::interval_arithmetic::Interval; use datafusion_expr::sort_properties::ExprProperties; @@ -46,14 +47,16 @@ use datafusion_expr::type_coercion::functions::data_types_with_scalar_udf; use datafusion_expr::{ expr_vec_fmt, ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDF, }; +use datafusion_physical_expr_common::physical_expr::{DynEq, DynHash}; +use itertools::Itertools; /// Physical expression of a scalar function -#[derive(Eq, PartialEq, Hash)] pub struct ScalarFunctionExpr { fun: Arc, name: String, args: Vec>, return_field: FieldRef, + config_options: Arc, } impl Debug for ScalarFunctionExpr { @@ -74,12 +77,14 @@ impl ScalarFunctionExpr { fun: Arc, args: Vec>, return_field: FieldRef, + config_options: Arc, ) -> Self { Self { fun, name: name.to_owned(), args, return_field, + config_options, } } @@ -88,6 +93,7 @@ impl ScalarFunctionExpr { fun: Arc, args: Vec>, schema: &Schema, + config_options: Arc, ) -> Result { let name = fun.name().to_string(); let arg_fields = args @@ -120,6 +126,7 @@ impl ScalarFunctionExpr { name, args, return_field, + config_options, }) } @@ -156,6 +163,10 @@ impl ScalarFunctionExpr { pub fn nullable(&self) -> bool { self.return_field.is_nullable() } + + pub fn config_options(&self) -> &ConfigOptions { + &self.config_options + } } impl fmt::Display for ScalarFunctionExpr { @@ -164,6 +175,42 @@ impl fmt::Display for ScalarFunctionExpr { } } +impl DynEq for ScalarFunctionExpr { + fn dyn_eq(&self, other: &dyn Any) -> bool { + other.downcast_ref::().is_some_and(|o| { + self.fun.eq(&o.fun) + && self.name.eq(&o.name) + && self.args.eq(&o.args) + && self.return_field.eq(&o.return_field) + && self + .config_options + .entries() + .iter() + .sorted_by(|&l, &r| l.key.cmp(&r.key)) + .zip( + o.config_options + .entries() + .iter() + .sorted_by(|&l, &r| l.key.cmp(&r.key)), + ) + .filter(|(l, r)| l.ne(r)) + .count() + == 0 + }) + } +} + +impl DynHash for ScalarFunctionExpr { + fn dyn_hash(&self, mut state: &mut dyn Hasher) { + self.type_id().hash(&mut state); + self.fun.hash(&mut state); + self.name.hash(&mut state); + self.args.hash(&mut state); + self.return_field.hash(&mut state); + self.config_options.entries().hash(&mut state); + } +} + impl PhysicalExpr for ScalarFunctionExpr { /// Return a reference to Any that can be used for downcasting fn as_any(&self) -> &dyn Any { @@ -202,6 +249,7 @@ impl PhysicalExpr for ScalarFunctionExpr { arg_fields, number_rows: batch.num_rows(), return_field: Arc::clone(&self.return_field), + config_options: Arc::clone(&self.config_options), })?; if let ColumnarValue::Array(array) = &output { @@ -238,6 +286,7 @@ impl PhysicalExpr for ScalarFunctionExpr { Arc::clone(&self.fun), children, Arc::clone(&self.return_field), + Arc::clone(&self.config_options), ))) } diff --git a/datafusion/physical-expr/src/schema_rewriter.rs b/datafusion/physical-expr/src/schema_rewriter.rs index b8759ea16d6e8..d622ce4bc01eb 100644 --- a/datafusion/physical-expr/src/schema_rewriter.rs +++ b/datafusion/physical-expr/src/schema_rewriter.rs @@ -20,7 +20,7 @@ use std::sync::Arc; use arrow::compute::can_cast_types; -use arrow::datatypes::{FieldRef, Schema}; +use arrow::datatypes::{FieldRef, Schema, SchemaRef}; use datafusion_common::{ exec_err, tree_node::{Transformed, TransformedResult, TreeNode}, @@ -30,69 +30,192 @@ use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use crate::expressions::{self, CastExpr, Column}; -/// Builder for rewriting physical expressions to match different schemas. +/// Trait for adapting physical expressions to match a target schema. +/// +/// This is used in file scans to rewrite expressions so that they can be evaluated +/// against the physical schema of the file being scanned. It allows for handling +/// differences between logical and physical schemas, such as type mismatches or missing columns. +/// +/// You can create a custom implemention of this trait to handle specific rewriting logic. +/// For example, to fill in missing columns with default values instead of nulls: +/// +/// ```rust +/// use datafusion_physical_expr::schema_rewriter::{PhysicalExprAdapter, PhysicalExprAdapterFactory}; +/// use arrow::datatypes::{Schema, Field, DataType, FieldRef, SchemaRef}; +/// use datafusion_physical_expr_common::physical_expr::PhysicalExpr; +/// use datafusion_common::{Result, ScalarValue, tree_node::{Transformed, TransformedResult, TreeNode}}; +/// use datafusion_physical_expr::expressions::{self, Column}; +/// use std::sync::Arc; +/// +/// #[derive(Debug)] +/// pub struct CustomPhysicalExprAdapter { +/// logical_file_schema: SchemaRef, +/// physical_file_schema: SchemaRef, +/// } +/// +/// impl PhysicalExprAdapter for CustomPhysicalExprAdapter { +/// fn rewrite(&self, expr: Arc) -> Result> { +/// expr.transform(|expr| { +/// if let Some(column) = expr.as_any().downcast_ref::() { +/// // Check if the column exists in the physical schema +/// if self.physical_file_schema.index_of(column.name()).is_err() { +/// // If the column is missing, fill it with a default value instead of null +/// // The default value could be stored in the table schema's column metadata for example. +/// let default_value = ScalarValue::Int32(Some(0)); +/// return Ok(Transformed::yes(expressions::lit(default_value))); +/// } +/// } +/// // If the column exists, return it as is +/// Ok(Transformed::no(expr)) +/// }).data() +/// } +/// +/// fn with_partition_values( +/// &self, +/// partition_values: Vec<(FieldRef, ScalarValue)>, +/// ) -> Arc { +/// // For simplicity, this example ignores partition values +/// Arc::new(CustomPhysicalExprAdapter { +/// logical_file_schema: self.logical_file_schema.clone(), +/// physical_file_schema: self.physical_file_schema.clone(), +/// }) +/// } +/// } +/// +/// #[derive(Debug)] +/// pub struct CustomPhysicalExprAdapterFactory; +/// +/// impl PhysicalExprAdapterFactory for CustomPhysicalExprAdapterFactory { +/// fn create( +/// &self, +/// logical_file_schema: SchemaRef, +/// physical_file_schema: SchemaRef, +/// ) -> Arc { +/// Arc::new(CustomPhysicalExprAdapter { +/// logical_file_schema, +/// physical_file_schema, +/// }) +/// } +/// } +/// ``` +pub trait PhysicalExprAdapter: Send + Sync + std::fmt::Debug { + /// Rewrite a physical expression to match the target schema. + /// + /// This method should return a transformed expression that matches the target schema. + /// + /// Arguments: + /// - `expr`: The physical expression to rewrite. + /// - `logical_file_schema`: The logical schema of the table being queried, excluding any partition columns. + /// - `physical_file_schema`: The physical schema of the file being scanned. + /// - `partition_values`: Optional partition values to use for rewriting partition column references. + /// These are handled as if they were columns appended onto the logical file schema. + /// + /// Returns: + /// - `Arc`: The rewritten physical expression that can be evaluated against the physical schema. + fn rewrite(&self, expr: Arc) -> Result>; + + fn with_partition_values( + &self, + partition_values: Vec<(FieldRef, ScalarValue)>, + ) -> Arc; +} + +pub trait PhysicalExprAdapterFactory: Send + Sync + std::fmt::Debug { + /// Create a new instance of the physical expression adapter. + fn create( + &self, + logical_file_schema: SchemaRef, + physical_file_schema: SchemaRef, + ) -> Arc; +} + +#[derive(Debug, Clone)] +pub struct DefaultPhysicalExprAdapterFactory; + +impl PhysicalExprAdapterFactory for DefaultPhysicalExprAdapterFactory { + fn create( + &self, + logical_file_schema: SchemaRef, + physical_file_schema: SchemaRef, + ) -> Arc { + Arc::new(DefaultPhysicalExprAdapter { + logical_file_schema, + physical_file_schema, + partition_values: Vec::new(), + }) + } +} + +/// Default implementation for rewriting physical expressions to match different schemas. /// /// # Example /// /// ```rust -/// use datafusion_physical_expr::schema_rewriter::PhysicalExprSchemaRewriter; +/// use datafusion_physical_expr::schema_rewriter::{DefaultPhysicalExprAdapterFactory, PhysicalExprAdapterFactory}; /// use arrow::datatypes::Schema; +/// use std::sync::Arc; /// /// # fn example( /// # predicate: std::sync::Arc, /// # physical_file_schema: &Schema, /// # logical_file_schema: &Schema, /// # ) -> datafusion_common::Result<()> { -/// let rewriter = PhysicalExprSchemaRewriter::new(physical_file_schema, logical_file_schema); -/// let adapted_predicate = rewriter.rewrite(predicate)?; +/// let factory = DefaultPhysicalExprAdapterFactory; +/// let adapter = factory.create(Arc::new(logical_file_schema.clone()), Arc::new(physical_file_schema.clone())); +/// let adapted_predicate = adapter.rewrite(predicate)?; /// # Ok(()) /// # } /// ``` -pub struct PhysicalExprSchemaRewriter<'a> { - physical_file_schema: &'a Schema, - logical_file_schema: &'a Schema, - partition_fields: Vec, - partition_values: Vec, +#[derive(Debug, Clone)] +pub struct DefaultPhysicalExprAdapter { + logical_file_schema: SchemaRef, + physical_file_schema: SchemaRef, + partition_values: Vec<(FieldRef, ScalarValue)>, } -impl<'a> PhysicalExprSchemaRewriter<'a> { - /// Create a new schema rewriter with the given schemas - pub fn new( - physical_file_schema: &'a Schema, - logical_file_schema: &'a Schema, - ) -> Self { +impl DefaultPhysicalExprAdapter { + /// Create a new instance of the default physical expression adapter. + /// + /// This adapter rewrites expressions to match the physical schema of the file being scanned, + /// handling type mismatches and missing columns by filling them with default values. + pub fn new(logical_file_schema: SchemaRef, physical_file_schema: SchemaRef) -> Self { Self { - physical_file_schema, logical_file_schema, - partition_fields: Vec::new(), + physical_file_schema, partition_values: Vec::new(), } } +} - /// Add partition columns and their corresponding values - /// - /// When a column reference matches a partition field, it will be replaced - /// with the corresponding literal value from partition_values. - pub fn with_partition_columns( - mut self, - partition_fields: Vec, - partition_values: Vec, - ) -> Self { - self.partition_fields = partition_fields; - self.partition_values = partition_values; - self +impl PhysicalExprAdapter for DefaultPhysicalExprAdapter { + fn rewrite(&self, expr: Arc) -> Result> { + let rewriter = DefaultPhysicalExprAdapterRewriter { + logical_file_schema: &self.logical_file_schema, + physical_file_schema: &self.physical_file_schema, + partition_fields: &self.partition_values, + }; + expr.transform(|expr| rewriter.rewrite_expr(Arc::clone(&expr))) + .data() } - /// Rewrite the given physical expression to match the target schema - /// - /// This method applies the following transformations: - /// 1. Replaces partition column references with literal values - /// 2. Handles missing columns by inserting null literals - /// 3. Casts columns when logical and physical schemas have different types - pub fn rewrite(&self, expr: Arc) -> Result> { - expr.transform(|expr| self.rewrite_expr(expr)).data() + fn with_partition_values( + &self, + partition_values: Vec<(FieldRef, ScalarValue)>, + ) -> Arc { + Arc::new(DefaultPhysicalExprAdapter { + partition_values, + ..self.clone() + }) } +} +struct DefaultPhysicalExprAdapterRewriter<'a> { + logical_file_schema: &'a Schema, + physical_file_schema: &'a Schema, + partition_fields: &'a [(FieldRef, ScalarValue)], +} + +impl<'a> DefaultPhysicalExprAdapterRewriter<'a> { fn rewrite_expr( &self, expr: Arc, @@ -109,7 +232,7 @@ impl<'a> PhysicalExprSchemaRewriter<'a> { expr: Arc, column: &Column, ) -> Result>> { - // Get the logical field for this column + // Get the logical field for this column if it exists in the logical schema let logical_field = match self.logical_file_schema.field_with_name(column.name()) { Ok(field) => field, @@ -118,10 +241,22 @@ impl<'a> PhysicalExprSchemaRewriter<'a> { if let Some(partition_value) = self.get_partition_value(column.name()) { return Ok(Transformed::yes(expressions::lit(partition_value))); } - // If the column is not found in the logical schema and is not a partition value, return an error - // This should probably never be hit unless something upstream broke, but nontheless it's better - // for us to return a handleable error than to panic / do something unexpected. - return Err(e.into()); + // This can be hit if a custom rewrite injected a reference to a column that doesn't exist in the logical schema. + // For example, a pre-computed column that is kept only in the physical schema. + // If the column exists in the physical schema, we can still use it. + if let Ok(physical_field) = + self.physical_file_schema.field_with_name(column.name()) + { + // If the column exists in the physical schema, we can use it in place of the logical column. + // This is nice to users because if they do a rewrite that results in something like `phyiscal_int32_col = 123u64` + // we'll at least handle the casts for them. + physical_field + } else { + // A completely unknown column that doesn't exist in either schema! + // This should probably never be hit unless something upstream broke, but nontheless it's better + // for us to return a handleable error than to panic / do something unexpected. + return Err(e.into()); + } } }; @@ -190,7 +325,6 @@ impl<'a> PhysicalExprSchemaRewriter<'a> { fn get_partition_value(&self, column_name: &str) -> Option { self.partition_fields .iter() - .zip(self.partition_values.iter()) .find(|(field, _)| field.name() == column_name) .map(|(_, value)| value.clone()) } @@ -229,10 +363,11 @@ mod tests { fn test_rewrite_column_with_type_cast() { let (physical_schema, logical_schema) = create_test_schema(); - let rewriter = PhysicalExprSchemaRewriter::new(&physical_schema, &logical_schema); + let factory = DefaultPhysicalExprAdapterFactory; + let adapter = factory.create(Arc::new(logical_schema), Arc::new(physical_schema)); let column_expr = Arc::new(Column::new("a", 0)); - let result = rewriter.rewrite(column_expr).unwrap(); + let result = adapter.rewrite(column_expr).unwrap(); // Should be wrapped in a cast expression assert!(result.as_any().downcast_ref::().is_some()); @@ -241,7 +376,8 @@ mod tests { #[test] fn test_rewrite_mulit_column_expr_with_type_cast() { let (physical_schema, logical_schema) = create_test_schema(); - let rewriter = PhysicalExprSchemaRewriter::new(&physical_schema, &logical_schema); + let factory = DefaultPhysicalExprAdapterFactory; + let adapter = factory.create(Arc::new(logical_schema), Arc::new(physical_schema)); // Create a complex expression: (a + 5) OR (c > 0.0) that tests the recursive case of the rewriter let column_a = Arc::new(Column::new("a", 0)) as Arc; @@ -261,7 +397,7 @@ mod tests { )), ); - let result = rewriter.rewrite(Arc::new(expr)).unwrap(); + let result = adapter.rewrite(Arc::new(expr)).unwrap(); println!("Rewritten expression: {result}"); let expected = expressions::BinaryExpr::new( @@ -294,10 +430,11 @@ mod tests { fn test_rewrite_missing_column() -> Result<()> { let (physical_schema, logical_schema) = create_test_schema(); - let rewriter = PhysicalExprSchemaRewriter::new(&physical_schema, &logical_schema); + let factory = DefaultPhysicalExprAdapterFactory; + let adapter = factory.create(Arc::new(logical_schema), Arc::new(physical_schema)); let column_expr = Arc::new(Column::new("c", 2)); - let result = rewriter.rewrite(column_expr)?; + let result = adapter.rewrite(column_expr)?; // Should be replaced with a literal null if let Some(literal) = result.as_any().downcast_ref::() { @@ -313,15 +450,17 @@ mod tests { fn test_rewrite_partition_column() -> Result<()> { let (physical_schema, logical_schema) = create_test_schema(); - let partition_fields = - vec![Arc::new(Field::new("partition_col", DataType::Utf8, false))]; - let partition_values = vec![ScalarValue::Utf8(Some("test_value".to_string()))]; + let partition_field = + Arc::new(Field::new("partition_col", DataType::Utf8, false)); + let partition_value = ScalarValue::Utf8(Some("test_value".to_string())); + let partition_values = vec![(partition_field, partition_value)]; - let rewriter = PhysicalExprSchemaRewriter::new(&physical_schema, &logical_schema) - .with_partition_columns(partition_fields, partition_values); + let factory = DefaultPhysicalExprAdapterFactory; + let adapter = factory.create(Arc::new(logical_schema), Arc::new(physical_schema)); + let adapter = adapter.with_partition_values(partition_values); let column_expr = Arc::new(Column::new("partition_col", 0)); - let result = rewriter.rewrite(column_expr)?; + let result = adapter.rewrite(column_expr)?; // Should be replaced with the partition value if let Some(literal) = result.as_any().downcast_ref::() { @@ -340,10 +479,11 @@ mod tests { fn test_rewrite_no_change_needed() -> Result<()> { let (physical_schema, logical_schema) = create_test_schema(); - let rewriter = PhysicalExprSchemaRewriter::new(&physical_schema, &logical_schema); + let factory = DefaultPhysicalExprAdapterFactory; + let adapter = factory.create(Arc::new(logical_schema), Arc::new(physical_schema)); let column_expr = Arc::new(Column::new("b", 1)) as Arc; - let result = rewriter.rewrite(Arc::clone(&column_expr))?; + let result = adapter.rewrite(Arc::clone(&column_expr))?; // Should be the same expression (no transformation needed) // We compare the underlying pointer through the trait object @@ -363,10 +503,11 @@ mod tests { Field::new("b", DataType::Utf8, false), // Non-nullable missing column ]); - let rewriter = PhysicalExprSchemaRewriter::new(&physical_schema, &logical_schema); + let factory = DefaultPhysicalExprAdapterFactory; + let adapter = factory.create(Arc::new(logical_schema), Arc::new(physical_schema)); let column_expr = Arc::new(Column::new("b", 1)); - let result = rewriter.rewrite(column_expr); + let result = adapter.rewrite(column_expr); assert!(result.is_err()); assert!(result .unwrap_err() @@ -398,7 +539,7 @@ mod tests { } } - /// Example showing how we can use the `PhysicalExprSchemaRewriter` to adapt RecordBatches during a scan + /// Example showing how we can use the `DefaultPhysicalExprAdapter` to adapt RecordBatches during a scan /// to apply projections, type conversions and handling of missing columns all at once. #[test] fn test_adapt_batches() { @@ -420,11 +561,13 @@ mod tests { col("a", &logical_schema).unwrap(), ]; - let rewriter = PhysicalExprSchemaRewriter::new(&physical_schema, &logical_schema); + let factory = DefaultPhysicalExprAdapterFactory; + let adapter = + factory.create(Arc::clone(&logical_schema), Arc::clone(&physical_schema)); let adapted_projection = projection .into_iter() - .map(|expr| rewriter.rewrite(expr).unwrap()) + .map(|expr| adapter.rewrite(expr).unwrap()) .collect_vec(); let adapted_schema = Arc::new(Schema::new( diff --git a/datafusion/physical-expr/src/utils/guarantee.rs b/datafusion/physical-expr/src/utils/guarantee.rs index 8092dc3c1a614..8a57cc7b7c154 100644 --- a/datafusion/physical-expr/src/utils/guarantee.rs +++ b/datafusion/physical-expr/src/utils/guarantee.rs @@ -129,35 +129,15 @@ impl LiteralGuarantee { .as_any() .downcast_ref::() { - // Only support single-column inlist currently, multi-column inlist is not supported - let col = inlist - .expr() - .as_any() - .downcast_ref::(); - let Some(col) = col else { - return builder; - }; - - let literals = inlist - .list() - .iter() - .map(|e| e.as_any().downcast_ref::()) - .collect::>>(); - let Some(literals) = literals else { - return builder; - }; - - let guarantee = if inlist.negated() { - Guarantee::NotIn + if let Some(inlist) = ColInList::try_new(inlist) { + builder.aggregate_multi_conjunct( + inlist.col, + inlist.guarantee, + inlist.list.iter().map(|lit| lit.value()), + ) } else { - Guarantee::In - }; - - builder.aggregate_multi_conjunct( - col, - guarantee, - literals.iter().map(|e| e.value()), - ) + builder + } } else { // split disjunction: OR OR ... let disjunctions = split_disjunction(expr); @@ -184,16 +164,6 @@ impl LiteralGuarantee { .filter_map(|expr| ColOpLit::try_new(expr)) .collect::>(); - if terms.is_empty() { - return builder; - } - - // if not all terms are of the form (col literal), - // can't infer any guarantees - if terms.len() != disjunctions.len() { - return builder; - } - // if all terms are 'col literal' with the same column // and operation we can infer any guarantees // @@ -203,18 +173,70 @@ impl LiteralGuarantee { // foo is required for the expression to be true. // So we can only create a multi value guarantee for `=` // (or a single value). (e.g. ignore `a != foo OR a != bar`) - let first_term = &terms[0]; - if terms.iter().all(|term| { - term.col.name() == first_term.col.name() - && term.guarantee == Guarantee::In - }) { + let first_term = terms.first(); + if !terms.is_empty() + && terms.len() == disjunctions.len() + && terms.iter().all(|term| { + term.col.name() == first_term.unwrap().col.name() + && term.guarantee == Guarantee::In + }) + { builder.aggregate_multi_conjunct( - first_term.col, + first_term.unwrap().col, Guarantee::In, terms.iter().map(|term| term.lit.value()), ) } else { - // can't infer anything + // Handle disjunctions with conjunctions like (a = 1 AND b = 2) OR (a = 2 AND b = 3) + // Extract termsets from each disjunction + // if in each termset, they have same column, and the guarantee is In, + // we can infer a guarantee for the column + // e.g. (a = 1 AND b = 2) OR (a = 2 AND b = 3) is `a IN (1, 2) AND b IN (2, 3)` + // otherwise, we can't infer a guarantee + let termsets: Vec> = disjunctions + .iter() + .map(|expr| { + split_conjunction(expr) + .into_iter() + .filter_map(ColOpLitOrInList::try_new) + .filter(|term| term.guarantee() == Guarantee::In) + .collect() + }) + .collect(); + + // Early return if any termset is empty (can't infer guarantees) + if termsets.iter().any(|terms| terms.is_empty()) { + return builder; + } + + // Find columns that appear in all termsets + let common_cols = find_common_columns(&termsets); + if common_cols.is_empty() { + return builder; + } + + // Build guarantees for common columns + let mut builder = builder; + for col in common_cols { + let literals: Vec<_> = termsets + .iter() + .filter_map(|terms| { + terms.iter().find(|term| term.col() == col).map( + |term| { + term.lits().into_iter().map(|lit| lit.value()) + }, + ) + }) + .flatten() + .collect(); + + builder = builder.aggregate_multi_conjunct( + col, + Guarantee::In, + literals.into_iter(), + ); + } + builder } } @@ -362,7 +384,7 @@ struct ColOpLit<'a> { } impl<'a> ColOpLit<'a> { - /// Returns Some(ColEqLit) if the expression is either: + /// Returns Some(ColOpLit) if the expression is either: /// 1. `col literal` /// 2. `literal col` /// 3. operator is `=` or `!=` @@ -410,6 +432,115 @@ impl<'a> ColOpLit<'a> { } } +/// Represents a single `col [not]in literal` expression +struct ColInList<'a> { + col: &'a crate::expressions::Column, + guarantee: Guarantee, + list: Vec<&'a crate::expressions::Literal>, +} + +impl<'a> ColInList<'a> { + /// Returns Some(ColInList) if the expression is either: + /// 1. `col (literal1, literal2, ...)` + /// 3. operator is `in` or `not in` + /// + /// Returns None otherwise + fn try_new(inlist: &'a crate::expressions::InListExpr) -> Option { + // Only support single-column inlist currently, multi-column inlist is not supported + let col = inlist + .expr() + .as_any() + .downcast_ref::()?; + + let literals = inlist + .list() + .iter() + .map(|e| e.as_any().downcast_ref::()) + .collect::>>()?; + + let guarantee = if inlist.negated() { + Guarantee::NotIn + } else { + Guarantee::In + }; + + Some(Self { + col, + guarantee, + list: literals, + }) + } +} + +/// Represents a single `col [not]in literal` expression or a single `col literal` expression +enum ColOpLitOrInList<'a> { + ColOpLit(ColOpLit<'a>), + ColInList(ColInList<'a>), +} + +impl<'a> ColOpLitOrInList<'a> { + fn try_new(expr: &'a Arc) -> Option { + match expr + .as_any() + .downcast_ref::() + { + Some(inlist) => Some(Self::ColInList(ColInList::try_new(inlist)?)), + None => ColOpLit::try_new(expr).map(Self::ColOpLit), + } + } + + fn guarantee(&self) -> Guarantee { + match self { + Self::ColOpLit(col_op_lit) => col_op_lit.guarantee, + Self::ColInList(col_in_list) => col_in_list.guarantee, + } + } + + fn col(&self) -> &'a crate::expressions::Column { + match self { + Self::ColOpLit(col_op_lit) => col_op_lit.col, + Self::ColInList(col_in_list) => col_in_list.col, + } + } + + fn lits(&self) -> Vec<&'a crate::expressions::Literal> { + match self { + Self::ColOpLit(col_op_lit) => vec![col_op_lit.lit], + Self::ColInList(col_in_list) => col_in_list.list.clone(), + } + } +} + +/// Find columns that appear in all termsets +fn find_common_columns<'a>( + termsets: &[Vec>], +) -> Vec<&'a crate::expressions::Column> { + if termsets.is_empty() { + return Vec::new(); + } + + // Start with columns from the first termset + let mut common_cols: HashSet<_> = termsets[0].iter().map(|term| term.col()).collect(); + + // check if any common_col in one termset occur many times + // e.g. (a = 1 AND a = 2) OR (a = 2 AND b = 3), should not infer a guarantee + // TODO: for above case, we can infer a IN (2) AND b IN (3) + if common_cols.len() != termsets[0].len() { + return Vec::new(); + } + + // Intersect with columns from remaining termsets + for termset in termsets.iter().skip(1) { + let termset_cols: HashSet<_> = termset.iter().map(|term| term.col()).collect(); + if termset_cols.len() != termset.len() { + return Vec::new(); + } + common_cols = common_cols.intersection(&termset_cols).cloned().collect(); + } + + common_cols.into_iter().collect() +} + #[cfg(test)] mod test { use std::sync::LazyLock; @@ -808,12 +939,11 @@ mod test { vec![not_in_guarantee("b", [1, 2, 3]), in_guarantee("b", [3, 4])], ); // b IN (1, 2, 3) OR b = 2 - // TODO this should be in_guarantee("b", [1, 2, 3]) but currently we don't support to analyze this kind of disjunction. Only `ColOpLit OR ColOpLit` is supported. test_analyze( col("b") .in_list(vec![lit(1), lit(2), lit(3)], false) .or(col("b").eq(lit(2))), - vec![], + vec![in_guarantee("b", [1, 2, 3])], ); // b IN (1, 2, 3) OR b != 3 test_analyze( @@ -824,13 +954,123 @@ mod test { ); } + #[test] + fn test_disjunction_and_conjunction_multi_column() { + // (a = "foo" AND b = 1) OR (a = "bar" AND b = 2) + test_analyze( + (col("a").eq(lit("foo")).and(col("b").eq(lit(1)))) + .or(col("a").eq(lit("bar")).and(col("b").eq(lit(2)))), + vec![in_guarantee("a", ["foo", "bar"]), in_guarantee("b", [1, 2])], + ); + // (a = "foo" AND b = 1) OR (a = "bar" AND b = 2) OR (b = 3) + test_analyze( + (col("a").eq(lit("foo")).and(col("b").eq(lit(1)))) + .or(col("a").eq(lit("bar")).and(col("b").eq(lit(2)))) + .or(col("b").eq(lit(3))), + vec![in_guarantee("b", [1, 2, 3])], + ); + // (a = "foo" AND b = 1) OR (a = "bar" AND b = 2) OR (c = 3) + test_analyze( + (col("a").eq(lit("foo")).and(col("b").eq(lit(1)))) + .or(col("a").eq(lit("bar")).and(col("b").eq(lit(2)))) + .or(col("c").eq(lit(3))), + vec![], + ); + // (a = "foo" AND b > 1) OR (a = "bar" AND b = 2) + test_analyze( + (col("a").eq(lit("foo")).and(col("b").gt(lit(1)))) + .or(col("a").eq(lit("bar")).and(col("b").eq(lit(2)))), + vec![in_guarantee("a", ["foo", "bar"])], + ); + // (a = "foo" AND b = 1) OR (b = 1 AND c = 2) OR (c = 3 AND a = "bar") + test_analyze( + (col("a").eq(lit("foo")).and(col("b").eq(lit(1)))) + .or(col("b").eq(lit(1)).and(col("c").eq(lit(2)))) + .or(col("c").eq(lit(3)).and(col("a").eq(lit("bar")))), + vec![], + ); + // (a = "foo" AND a = "bar") OR (a = "good" AND b = 1) + // TODO: this should be `a IN ("good") AND b IN (1)` + test_analyze( + (col("a").eq(lit("foo")).and(col("a").eq(lit("bar")))) + .or(col("a").eq(lit("good")).and(col("b").eq(lit(1)))), + vec![], + ); + // (a = "foo" AND a = "foo") OR (a = "good" AND b = 1) + // TODO: this should be `a IN ("foo", "good")` + test_analyze( + (col("a").eq(lit("foo")).and(col("a").eq(lit("foo")))) + .or(col("a").eq(lit("good")).and(col("b").eq(lit(1)))), + vec![], + ); + // (a = "foo" AND b = 3) OR (b = 4 AND b = 1) OR (b = 2 AND a = "bar") + test_analyze( + (col("a").eq(lit("foo")).and(col("b").eq(lit(3)))) + .or(col("b").eq(lit(4)).and(col("b").eq(lit(1)))) + .or(col("b").eq(lit(2)).and(col("a").eq(lit("bar")))), + vec![], + ); + // (b = 1 AND b > 3) OR (a = "foo" AND b = 4) + test_analyze( + (col("b").eq(lit(1)).and(col("b").gt(lit(3)))) + .or(col("a").eq(lit("foo")).and(col("b").eq(lit(4)))), + // if b isn't 1 or 4, it can not be true (though the expression actually can never be true) + vec![in_guarantee("b", [1, 4])], + ); + // (a = "foo" AND b = 1) OR (a != "bar" AND b = 2) + test_analyze( + (col("a").eq(lit("foo")).and(col("b").eq(lit(1)))) + .or(col("a").not_eq(lit("bar")).and(col("b").eq(lit(2)))), + vec![in_guarantee("b", [1, 2])], + ); + // (a = "foo" AND b = 1) OR (a LIKE "%bar" AND b = 2) + test_analyze( + (col("a").eq(lit("foo")).and(col("b").eq(lit(1)))) + .or(col("a").like(lit("%bar")).and(col("b").eq(lit(2)))), + vec![in_guarantee("b", [1, 2])], + ); + // (a IN ("foo", "bar") AND b = 5) OR (a IN ("foo", "bar") AND b = 6) + test_analyze( + (col("a") + .in_list(vec![lit("foo"), lit("bar")], false) + .and(col("b").eq(lit(5)))) + .or(col("a") + .in_list(vec![lit("foo"), lit("bar")], false) + .and(col("b").eq(lit(6)))), + vec![in_guarantee("a", ["foo", "bar"]), in_guarantee("b", [5, 6])], + ); + // (a IN ("foo", "bar") AND b = 5) OR (a IN ("foo") AND b = 6) + test_analyze( + (col("a") + .in_list(vec![lit("foo"), lit("bar")], false) + .and(col("b").eq(lit(5)))) + .or(col("a") + .in_list(vec![lit("foo")], false) + .and(col("b").eq(lit(6)))), + vec![in_guarantee("a", ["foo", "bar"]), in_guarantee("b", [5, 6])], + ); + // (a NOT IN ("foo", "bar") AND b = 5) OR (a NOT IN ("foo") AND b = 6) + test_analyze( + (col("a") + .in_list(vec![lit("foo"), lit("bar")], true) + .and(col("b").eq(lit(5)))) + .or(col("a") + .in_list(vec![lit("foo")], true) + .and(col("b").eq(lit(6)))), + vec![in_guarantee("b", [5, 6])], + ); + } + /// Tests that analyzing expr results in the expected guarantees fn test_analyze(expr: Expr, expected: Vec) { println!("Begin analyze of {expr}"); let schema = schema(); let physical_expr = logical2physical(&expr, &schema); - let actual = LiteralGuarantee::analyze(&physical_expr); + let actual = LiteralGuarantee::analyze(&physical_expr) + .into_iter() + .sorted_by_key(|g| g.column.name().to_string()) + .collect::>(); assert_eq!( expected, actual, "expr: {expr}\ @@ -867,6 +1107,7 @@ mod test { Arc::new(Schema::new(vec![ Field::new("a", DataType::Utf8, false), Field::new("b", DataType::Int32, false), + Field::new("c", DataType::Int32, false), ])) }); Arc::clone(&SCHEMA) diff --git a/datafusion/physical-expr/src/window/aggregate.rs b/datafusion/physical-expr/src/window/aggregate.rs index 6f0e7c963d144..d7287c27de7eb 100644 --- a/datafusion/physical-expr/src/window/aggregate.rs +++ b/datafusion/physical-expr/src/window/aggregate.rs @@ -23,7 +23,7 @@ use std::sync::Arc; use crate::aggregate::AggregateFunctionExpr; use crate::window::standard::add_new_ordering_expr_with_partition_by; -use crate::window::window_expr::AggregateWindowExpr; +use crate::window::window_expr::{AggregateWindowExpr, WindowFn}; use crate::window::{ PartitionBatches, PartitionWindowAggStates, SlidingAggregateWindowExpr, WindowExpr, }; @@ -211,6 +211,10 @@ impl WindowExpr for PlainAggregateWindowExpr { fn uses_bounded_memory(&self) -> bool { !self.window_frame.end_bound.is_unbounded() } + + fn create_window_fn(&self) -> Result { + Ok(WindowFn::Aggregate(self.get_accumulator()?)) + } } impl AggregateWindowExpr for PlainAggregateWindowExpr { diff --git a/datafusion/physical-expr/src/window/sliding_aggregate.rs b/datafusion/physical-expr/src/window/sliding_aggregate.rs index 33921a57a6ce0..cb105e773d2e8 100644 --- a/datafusion/physical-expr/src/window/sliding_aggregate.rs +++ b/datafusion/physical-expr/src/window/sliding_aggregate.rs @@ -22,7 +22,7 @@ use std::ops::Range; use std::sync::Arc; use crate::aggregate::AggregateFunctionExpr; -use crate::window::window_expr::AggregateWindowExpr; +use crate::window::window_expr::{AggregateWindowExpr, WindowFn}; use crate::window::{ PartitionBatches, PartitionWindowAggStates, PlainAggregateWindowExpr, WindowExpr, }; @@ -175,6 +175,10 @@ impl WindowExpr for SlidingAggregateWindowExpr { window_frame: Arc::clone(&self.window_frame), })) } + + fn create_window_fn(&self) -> Result { + Ok(WindowFn::Aggregate(self.get_accumulator()?)) + } } impl AggregateWindowExpr for SlidingAggregateWindowExpr { diff --git a/datafusion/physical-expr/src/window/standard.rs b/datafusion/physical-expr/src/window/standard.rs index c3761aa78f725..7b208ea41f173 100644 --- a/datafusion/physical-expr/src/window/standard.rs +++ b/datafusion/physical-expr/src/window/standard.rs @@ -275,6 +275,10 @@ impl WindowExpr for StandardWindowExpr { false } } + + fn create_window_fn(&self) -> Result { + Ok(WindowFn::Builtin(self.expr.create_evaluator()?)) + } } /// Adds a new ordering expression into existing ordering equivalence class(es) based on diff --git a/datafusion/physical-expr/src/window/window_expr.rs b/datafusion/physical-expr/src/window/window_expr.rs index dd671e0685717..ee39b5b2451eb 100644 --- a/datafusion/physical-expr/src/window/window_expr.rs +++ b/datafusion/physical-expr/src/window/window_expr.rs @@ -130,6 +130,12 @@ pub trait WindowExpr: Send + Sync + Debug { /// Get the reverse expression of this [WindowExpr]. fn get_reverse_expr(&self) -> Option>; + /// Creates a new instance of the window function evaluator. + /// + /// Returns `WindowFn::Builtin` for built-in window functions (e.g., ROW_NUMBER, RANK) + /// or `WindowFn::Aggregate` for aggregate window functions (e.g., SUM, AVG). + fn create_window_fn(&self) -> Result; + /// Returns all expressions used in the [`WindowExpr`]. /// These expressions are (1) function arguments, (2) partition by expressions, (3) order by expressions. fn all_expressions(&self) -> WindowPhysicalExpressions { diff --git a/datafusion/physical-optimizer/src/enforce_distribution.rs b/datafusion/physical-optimizer/src/enforce_distribution.rs index 39eb557ea6012..88dcd4c523cf4 100644 --- a/datafusion/physical-optimizer/src/enforce_distribution.rs +++ b/datafusion/physical-optimizer/src/enforce_distribution.rs @@ -925,19 +925,20 @@ fn add_hash_on_top( Ok(input) } -/// Adds a [`SortPreservingMergeExec`] operator on top of input executor -/// to satisfy single distribution requirement. +/// Adds a [`SortPreservingMergeExec`] or a [`CoalescePartitionsExec`] operator +/// on top of the given plan node to satisfy a single partition requirement +/// while preserving ordering constraints. /// -/// # Arguments +/// # Parameters /// /// * `input`: Current node. /// /// # Returns /// -/// Updated node with an execution plan, where desired single -/// distribution is satisfied by adding [`SortPreservingMergeExec`]. -fn add_spm_on_top(input: DistributionContext) -> DistributionContext { - // Add SortPreservingMerge only when partition count is larger than 1. +/// Updated node with an execution plan, where the desired single distribution +/// requirement is satisfied. +fn add_merge_on_top(input: DistributionContext) -> DistributionContext { + // Apply only when the partition count is larger than one. if input.plan.output_partitioning().partition_count() > 1 { // When there is an existing ordering, we preserve ordering // when decreasing partitions. This will be un-done in the future @@ -945,12 +946,13 @@ fn add_spm_on_top(input: DistributionContext) -> DistributionContext { // - Preserving ordering is not helpful in terms of satisfying ordering requirements // - Usage of order preserving variants is not desirable // (determined by flag `config.optimizer.prefer_existing_sort`) - let new_plan = if let Some(ordering) = input.plan.output_ordering() { + let new_plan = if let Some(req) = input.plan.output_ordering() { Arc::new(SortPreservingMergeExec::new( - ordering.clone(), + req.clone(), Arc::clone(&input.plan), )) as _ } else { + // If there is no input order, we can simply coalesce partitions: Arc::new(CoalescePartitionsExec::new(Arc::clone(&input.plan))) as _ }; @@ -1259,7 +1261,7 @@ pub fn ensure_distribution( // Satisfy the distribution requirement if it is unmet. match &requirement { Distribution::SinglePartition => { - child = add_spm_on_top(child); + child = add_merge_on_top(child); } Distribution::HashPartitioned(exprs) => { if add_roundrobin { diff --git a/datafusion/physical-optimizer/src/enforce_sorting/sort_pushdown.rs b/datafusion/physical-optimizer/src/enforce_sorting/sort_pushdown.rs index a9c0e4cb28589..6e4e784866129 100644 --- a/datafusion/physical-optimizer/src/enforce_sorting/sort_pushdown.rs +++ b/datafusion/physical-optimizer/src/enforce_sorting/sort_pushdown.rs @@ -35,6 +35,7 @@ use datafusion_physical_expr_common::sort_expr::{ LexOrdering, LexRequirement, OrderingRequirements, PhysicalSortExpr, PhysicalSortRequirement, }; +use datafusion_physical_plan::execution_plan::CardinalityEffect; use datafusion_physical_plan::filter::FilterExec; use datafusion_physical_plan::joins::utils::{ calculate_join_output_ordering, ColumnIndex, @@ -190,6 +191,7 @@ fn pushdown_sorts_helper( } else if let Some(adjusted) = pushdown_requirement_to_children( &sort_push_down.plan, parent_requirement.clone(), + parent_fetch, )? { // For operators that can take a sort pushdown, continue with updated // requirements: @@ -216,7 +218,41 @@ fn pushdown_sorts_helper( fn pushdown_requirement_to_children( plan: &Arc, parent_required: OrderingRequirements, + parent_fetch: Option, ) -> Result>>> { + // If there is a limit on the parent plan we cannot push it down through operators that change the cardinality. + // E.g. consider if LIMIT 2 is applied below a FilteExec that filters out 1/2 of the rows we'll end up with 1 row instead of 2. + // If the LIMIT is applied after the FilterExec and the FilterExec returns > 2 rows we'll end up with 2 rows (correct). + if parent_fetch.is_some() && !plan.supports_limit_pushdown() { + return Ok(None); + } + // Note: we still need to check the cardinality effect of the plan here, because the + // limit pushdown is not always safe, even if the plan supports it. Here's an example: + // + // UnionExec advertises `supports_limit_pushdown() == true` because it can + // forward a LIMIT k to each of its children—i.e. apply “LIMIT k” separately + // on each branch before merging them together. + // + // However, UnionExec’s `cardinality_effect() == GreaterEqual` (it sums up + // all child row counts), so pushing a global TopK/LIMIT through it would + // break the semantics of “take the first k rows of the combined result.” + // + // For example, with two branches A and B and k = 3: + // — Global LIMIT: take the first 3 rows from (A ∪ B) after merging. + // — Pushed down: take 3 from A, 3 from B, then merge → up to 6 rows! + // + // That’s why we still block on cardinality: even though UnionExec can + // push a LIMIT to its children, its GreaterEqual effect means it cannot + // preserve the global TopK semantics. + if parent_fetch.is_some() { + match plan.cardinality_effect() { + CardinalityEffect::Equal => { + // safe: only true sources (e.g. CoalesceBatchesExec, ProjectionExec) pass + } + _ => return Ok(None), + } + } + let maintains_input_order = plan.maintains_input_order(); if is_window(plan) { let mut required_input_ordering = plan.required_input_ordering(); diff --git a/datafusion/physical-optimizer/src/filter_pushdown.rs b/datafusion/physical-optimizer/src/filter_pushdown.rs index 885280576b4b8..66ccc1a798537 100644 --- a/datafusion/physical-optimizer/src/filter_pushdown.rs +++ b/datafusion/physical-optimizer/src/filter_pushdown.rs @@ -15,6 +15,22 @@ // specific language governing permissions and limitations // under the License. +//! Filter Pushdown Optimization Process +//! +//! The filter pushdown mechanism involves four key steps: +//! 1. **Optimizer Asks Parent for a Filter Pushdown Plan**: The optimizer calls [`ExecutionPlan::gather_filters_for_pushdown`] +//! on the parent node, passing in parent predicates and phase. The parent node creates a [`FilterDescription`] +//! by inspecting its logic and children's schemas, determining which filters can be pushed to each child. +//! 2. **Optimizer Executes Pushdown**: The optimizer recursively calls `push_down_filters` in this module on each child, +//! passing the appropriate filters (`Vec>`) for that child. +//! 3. **Optimizer Gathers Results**: The optimizer collects [`FilterPushdownPropagation`] results from children, +//! containing information about which filters were successfully pushed down vs. unsupported. +//! 4. **Parent Responds**: The optimizer calls [`ExecutionPlan::handle_child_pushdown_result`] on the parent, +//! passing a [`ChildPushdownResult`] containing the aggregated pushdown outcomes. The parent decides +//! how to handle filters that couldn't be pushed down (e.g., keep them as FilterExec nodes). +//! +//! [`FilterDescription`]: datafusion_physical_plan::filter_pushdown::FilterDescription + use std::sync::Arc; use crate::PhysicalOptimizerRule; @@ -22,12 +38,12 @@ use crate::PhysicalOptimizerRule; use datafusion_common::{config::ConfigOptions, Result}; use datafusion_physical_expr::PhysicalExpr; use datafusion_physical_plan::filter_pushdown::{ - ChildPushdownResult, FilterPushdownPhase, FilterPushdownPropagation, - PredicateSupport, PredicateSupports, + ChildFilterPushdownResult, ChildPushdownResult, FilterPushdownPhase, + FilterPushdownPropagation, PushedDown, }; use datafusion_physical_plan::{with_new_children_if_necessary, ExecutionPlan}; -use itertools::izip; +use itertools::{izip, Itertools}; /// Attempts to recursively push given filters from the top of the tree into leafs. /// @@ -419,24 +435,14 @@ impl PhysicalOptimizerRule for FilterPushdown { } } -/// Support state of each predicate for the children of the node. -/// These predicates are coming from the parent node. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum ParentPredicateStates { - NoChildren, - Unsupported, - Supported, -} - fn push_down_filters( node: Arc, parent_predicates: Vec>, config: &ConfigOptions, phase: FilterPushdownPhase, ) -> Result>> { - // If the node has any child, these will be rewritten as supported or unsupported - let mut parent_predicates_pushdown_states = - vec![ParentPredicateStates::NoChildren; parent_predicates.len()]; + let mut parent_filter_pushdown_supports: Vec> = + vec![vec![]; parent_predicates.len()]; let mut self_filters_pushdown_supports = vec![]; let mut new_children = Vec::with_capacity(node.children().len()); @@ -444,45 +450,66 @@ fn push_down_filters( let filter_description = node.gather_filters_for_pushdown(phase, parent_predicates.clone(), config)?; - for (child, parent_filters, self_filters) in izip!( + let filter_description_parent_filters = filter_description.parent_filters(); + let filter_description_self_filters = filter_description.self_filters(); + if filter_description_parent_filters.len() != children.len() { + return Err(datafusion_common::DataFusionError::Internal( + format!( + "Filter pushdown expected FilterDescription to have parent filters for {expected_num_children}, but got {actual_num_children} for node {node_name}", + expected_num_children = children.len(), + actual_num_children = filter_description_parent_filters.len(), + node_name = node.name(), + ), + )); + } + if filter_description_self_filters.len() != children.len() { + return Err(datafusion_common::DataFusionError::Internal( + format!( + "Filter pushdown expected FilterDescription to have self filters for {expected_num_children}, but got {actual_num_children} for node {node_name}", + expected_num_children = children.len(), + actual_num_children = filter_description_self_filters.len(), + node_name = node.name(), + ), + )); + } + + for (child_idx, (child, parent_filters, self_filters)) in izip!( children, filter_description.parent_filters(), filter_description.self_filters() - ) { + ) + .enumerate() + { // Here, `parent_filters` are the predicates which are provided by the parent node of // the current node, and tried to be pushed down over the child which the loop points // currently. `self_filters` are the predicates which are provided by the current node, // and tried to be pushed down over the child similarly. let num_self_filters = self_filters.len(); - let mut parent_supported_predicate_indices = vec![]; - let mut all_predicates = self_filters; + let mut all_predicates = self_filters.clone(); + + // Track which parent filters are supported for this child + let mut parent_filter_indices = vec![]; // Iterate over each predicate coming from the parent - for (idx, filter) in parent_filters.into_iter().enumerate() { + for (parent_filter_idx, filter) in parent_filters.into_iter().enumerate() { // Check if we can push this filter down to our child. // These supports are defined in `gather_filters_for_pushdown()` - match filter { - PredicateSupport::Supported(predicate) => { + match filter.discriminant { + PushedDown::Yes => { // Queue this filter up for pushdown to this child - all_predicates.push(predicate); - parent_supported_predicate_indices.push(idx); - // Mark this filter as supported by our children if no child has marked it as unsupported - if parent_predicates_pushdown_states[idx] - != ParentPredicateStates::Unsupported - { - parent_predicates_pushdown_states[idx] = - ParentPredicateStates::Supported; - } + all_predicates.push(filter.predicate); + parent_filter_indices.push(parent_filter_idx); } - PredicateSupport::Unsupported(_) => { - // Mark as unsupported by our children - parent_predicates_pushdown_states[idx] = - ParentPredicateStates::Unsupported; + PushedDown::No => { + // This filter won't be pushed down to this child + // Will be marked as unsupported later in the initialization loop } } } + let num_parent_filters = all_predicates.len() - num_self_filters; + // Any filters that could not be pushed down to a child are marked as not-supported to our parents let result = push_down_filters(Arc::clone(child), all_predicates, config, phase)?; @@ -497,64 +524,68 @@ fn push_down_filters( // Our child doesn't know the difference between filters that were passed down // from our parents and filters that the current node injected. We need to de-entangle // this since we do need to distinguish between them. - let mut all_filters = result.filters.into_inner(); - let parent_predicates = all_filters.split_off(num_self_filters); - let self_predicates = all_filters; - self_filters_pushdown_supports.push(PredicateSupports::new(self_predicates)); + let mut all_filters = result.filters.into_iter().collect_vec(); + if all_filters.len() != num_self_filters + num_parent_filters { + return Err(datafusion_common::DataFusionError::Internal( + format!( + "Filter pushdown did not return the expected number of filters: expected {num_self_filters} self filters and {num_parent_filters} parent filters, but got {num_filters_from_child}. Likely culprit is {child}", + num_self_filters = num_self_filters, + num_parent_filters = num_parent_filters, + num_filters_from_child = all_filters.len(), + child = child.name(), + ), + )); + } + let parent_filters = all_filters + .split_off(num_self_filters) + .into_iter() + .collect_vec(); + self_filters_pushdown_supports.push( + all_filters + .into_iter() + .zip(self_filters) + .map(|(s, f)| s.wrap_expression(f)) + .collect(), + ); - for (idx, result) in parent_supported_predicate_indices - .iter() - .zip(parent_predicates) + // Start by marking all parent filters as unsupported for this child + for parent_filter_pushdown_support in parent_filter_pushdown_supports.iter_mut() { + parent_filter_pushdown_support.push(PushedDown::No); + assert_eq!( + parent_filter_pushdown_support.len(), + child_idx + 1, + "Parent filter pushdown supports should have the same length as the number of children" + ); + } + // Map results from pushed-down filters back to original parent filter indices + for (result_idx, parent_filter_support) in parent_filters.into_iter().enumerate() { - let current_node_state = match result { - PredicateSupport::Supported(_) => ParentPredicateStates::Supported, - PredicateSupport::Unsupported(_) => ParentPredicateStates::Unsupported, - }; - match (current_node_state, parent_predicates_pushdown_states[*idx]) { - (r, ParentPredicateStates::NoChildren) => { - // If we have no result, use the current state from this child - parent_predicates_pushdown_states[*idx] = r; - } - (ParentPredicateStates::Supported, ParentPredicateStates::Supported) => { - // If the current child and all previous children are supported, - // the filter continues to support it - parent_predicates_pushdown_states[*idx] = - ParentPredicateStates::Supported; - } - _ => { - // Either the current child or a previous child marked this filter as unsupported - parent_predicates_pushdown_states[*idx] = - ParentPredicateStates::Unsupported; - } - } + let original_parent_idx = parent_filter_indices[result_idx]; + parent_filter_pushdown_supports[original_parent_idx][child_idx] = + parent_filter_support; } } + // Re-create this node with new children let updated_node = with_new_children_if_necessary(Arc::clone(&node), new_children)?; - // Remap the result onto the parent filters as they were given to us. - // Any filters that were not pushed down to any children are marked as unsupported. - let parent_pushdown_result = PredicateSupports::new( - parent_predicates_pushdown_states - .into_iter() - .zip(parent_predicates) - .map(|(state, filter)| match state { - ParentPredicateStates::NoChildren => { - PredicateSupport::Unsupported(filter) - } - ParentPredicateStates::Unsupported => { - PredicateSupport::Unsupported(filter) - } - ParentPredicateStates::Supported => PredicateSupport::Supported(filter), - }) - .collect(), - ); + // TODO: by calling `handle_child_pushdown_result` we are assuming that the // `ExecutionPlan` implementation will not change the plan itself. // Should we have a separate method for dynamic pushdown that does not allow modifying the plan? let mut res = updated_node.handle_child_pushdown_result( phase, ChildPushdownResult { - parent_filters: parent_pushdown_result, + parent_filters: parent_predicates + .into_iter() + .enumerate() + .map( + |(parent_filter_idx, parent_filter)| ChildFilterPushdownResult { + filter: parent_filter, + child_results: parent_filter_pushdown_supports[parent_filter_idx] + .clone(), + }, + ) + .collect(), self_filters: self_filters_pushdown_supports, }, config, diff --git a/datafusion/physical-optimizer/src/output_requirements.rs b/datafusion/physical-optimizer/src/output_requirements.rs index 044d27811be67..d8ff2914dc3b1 100644 --- a/datafusion/physical-optimizer/src/output_requirements.rs +++ b/datafusion/physical-optimizer/src/output_requirements.rs @@ -138,10 +138,35 @@ impl DisplayAs for OutputRequirementExec { ) -> std::fmt::Result { match t { DisplayFormatType::Default | DisplayFormatType::Verbose => { - write!(f, "OutputRequirementExec") + let order_cols = self + .order_requirement + .as_ref() + .map(|reqs| reqs.first()) + .map(|lex| { + let pairs: Vec = lex + .iter() + .map(|req| { + let direction = req + .options + .as_ref() + .map( + |opt| if opt.descending { "desc" } else { "asc" }, + ) + .unwrap_or("unspecified"); + format!("({}, {direction})", req.expr) + }) + .collect(); + format!("[{}]", pairs.join(", ")) + }) + .unwrap_or_else(|| "[]".to_string()); + + write!( + f, + "OutputRequirementExec: order_by={}, dist_by={}", + order_cols, self.dist_requirement + ) } DisplayFormatType::TreeRender => { - // TODO: collect info write!(f, "") } } diff --git a/datafusion/physical-plan/Cargo.toml b/datafusion/physical-plan/Cargo.toml index 095ee78cd0d63..97b1cff77739b 100644 --- a/datafusion/physical-plan/Cargo.toml +++ b/datafusion/physical-plan/Cargo.toml @@ -92,3 +92,7 @@ name = "spill_io" [[bench]] harness = false name = "sort_preserving_merge" + +[[bench]] +harness = false +name = "aggregate_vectorized" diff --git a/datafusion/physical-plan/benches/aggregate_vectorized.rs b/datafusion/physical-plan/benches/aggregate_vectorized.rs new file mode 100644 index 0000000000000..13a408b2da9ec --- /dev/null +++ b/datafusion/physical-plan/benches/aggregate_vectorized.rs @@ -0,0 +1,187 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::array::ArrayRef; +use arrow::datatypes::StringViewType; +use arrow::util::bench_util::{ + create_string_view_array_with_len, create_string_view_array_with_max_len, +}; +use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; +use datafusion_physical_plan::aggregates::group_values::multi_group_by::bytes_view::ByteViewGroupValueBuilder; +use datafusion_physical_plan::aggregates::group_values::multi_group_by::GroupColumn; +use std::sync::Arc; + +const SIZES: [usize; 3] = [1_000, 10_000, 100_000]; +const NULL_DENSITIES: [f32; 3] = [0.0, 0.1, 0.5]; + +fn bench_vectorized_append(c: &mut Criterion) { + let mut group = c.benchmark_group("ByteViewGroupValueBuilder_vectorized_append"); + + for &size in &SIZES { + let rows: Vec = (0..size).collect(); + + for &null_density in &NULL_DENSITIES { + let input = create_string_view_array_with_len(size, null_density, 8, false); + let input: ArrayRef = Arc::new(input); + + // vectorized_append + let id = BenchmarkId::new( + format!("inlined_null_{null_density:.1}_size_{size}"), + "vectorized_append", + ); + group.bench_function(id, |b| { + b.iter(|| { + let mut builder = ByteViewGroupValueBuilder::::new(); + builder.vectorized_append(&input, &rows).unwrap(); + }); + }); + + // append_val + let id = BenchmarkId::new( + format!("inlined_null_{null_density:.1}_size_{size}"), + "append_val", + ); + group.bench_function(id, |b| { + b.iter(|| { + let mut builder = ByteViewGroupValueBuilder::::new(); + for &i in &rows { + builder.append_val(&input, i).unwrap(); + } + }); + }); + + // vectorized_equal_to + let id = BenchmarkId::new( + format!("inlined_null_{null_density:.1}_size_{size}"), + "vectorized_equal_to", + ); + group.bench_function(id, |b| { + let mut builder = ByteViewGroupValueBuilder::::new(); + builder.vectorized_append(&input, &rows).unwrap(); + let mut results = vec![true; size]; + b.iter(|| { + builder.vectorized_equal_to(&rows, &input, &rows, &mut results); + }); + }); + } + } + + for &size in &SIZES { + let rows: Vec = (0..size).collect(); + + for &null_density in &NULL_DENSITIES { + let scenario = "mixed"; + let input = create_string_view_array_with_len(size, null_density, 64, true); + let input: ArrayRef = Arc::new(input); + + // vectorized_append + let id = BenchmarkId::new( + format!("{scenario}_null_{null_density:.1}_size_{size}"), + "vectorized_append", + ); + group.bench_function(id, |b| { + b.iter(|| { + let mut builder = ByteViewGroupValueBuilder::::new(); + builder.vectorized_append(&input, &rows).unwrap(); + }); + }); + + // append_val + let id = BenchmarkId::new( + format!("{scenario}_null_{null_density:.1}_size_{size}"), + "append_val", + ); + group.bench_function(id, |b| { + b.iter(|| { + let mut builder = ByteViewGroupValueBuilder::::new(); + for &i in &rows { + builder.append_val(&input, i).unwrap(); + } + }); + }); + + // vectorized_equal_to + let id = BenchmarkId::new( + format!("{scenario}_null_{null_density:.1}_size_{size}"), + "vectorized_equal_to", + ); + group.bench_function(id, |b| { + let mut builder = ByteViewGroupValueBuilder::::new(); + builder.vectorized_append(&input, &rows).unwrap(); + let mut results = vec![true; size]; + b.iter(|| { + builder.vectorized_equal_to(&rows, &input, &rows, &mut results); + }); + }); + } + } + + for &size in &SIZES { + let rows: Vec = (0..size).collect(); + + for &null_density in &NULL_DENSITIES { + let scenario = "random"; + let input = create_string_view_array_with_max_len(size, null_density, 400); + let input: ArrayRef = Arc::new(input); + + // vectorized_append + let id = BenchmarkId::new( + format!("{scenario}_null_{null_density:.1}_size_{size}"), + "vectorized_append", + ); + group.bench_function(id, |b| { + b.iter(|| { + let mut builder = ByteViewGroupValueBuilder::::new(); + builder.vectorized_append(&input, &rows).unwrap(); + }); + }); + + // append_val + let id = BenchmarkId::new( + format!("{scenario}_null_{null_density:.1}_size_{size}"), + "append_val", + ); + group.bench_function(id, |b| { + b.iter(|| { + let mut builder = ByteViewGroupValueBuilder::::new(); + for &i in &rows { + builder.append_val(&input, i).unwrap(); + } + }); + }); + + // vectorized_equal_to + let id = BenchmarkId::new( + format!("{scenario}_null_{null_density:.1}_size_{size}"), + "vectorized_equal_to", + ); + group.bench_function(id, |b| { + let mut builder = ByteViewGroupValueBuilder::::new(); + builder.vectorized_append(&input, &rows).unwrap(); + let mut results = vec![true; size]; + b.iter(|| { + builder.vectorized_equal_to(&rows, &input, &rows, &mut results); + }); + }); + } + } + + group.finish(); +} + +criterion_group!(benches, bench_vectorized_append); +criterion_main!(benches); diff --git a/datafusion/physical-plan/src/aggregates/group_values/mod.rs b/datafusion/physical-plan/src/aggregates/group_values/mod.rs index ce56ca4f7dfd7..f2f489b7223c3 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/mod.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/mod.rs @@ -28,7 +28,7 @@ use datafusion_common::Result; use datafusion_expr::EmitTo; -pub(crate) mod multi_group_by; +pub mod multi_group_by; mod row; mod single_group_by; @@ -84,7 +84,7 @@ mod null_builder; /// Each distinct group in a hash aggregation is identified by a unique group id /// (usize) which is assigned by instances of this trait. Group ids are /// continuous without gaps, starting from 0. -pub(crate) trait GroupValues: Send { +pub trait GroupValues: Send { /// Calculates the group id for each input row of `cols`, assigning new /// group ids as necessary. /// @@ -121,13 +121,15 @@ pub(crate) trait GroupValues: Send { /// will be chosen. /// /// - If group by multiple columns, and all column types have the specific -/// [`GroupColumn`] implementations, [`GroupValuesColumn`] will be chosen. +/// `GroupColumn` implementations, `GroupValuesColumn` will be chosen. /// -/// - Otherwise, the general implementation [`GroupValuesRows`] will be chosen. +/// - Otherwise, the general implementation `GroupValuesRows` will be chosen. /// -/// [`GroupColumn`]: crate::aggregates::group_values::multi_group_by::GroupColumn +/// `GroupColumn`: crate::aggregates::group_values::multi_group_by::GroupColumn +/// `GroupValuesColumn`: crate::aggregates::group_values::multi_group_by::GroupValuesColumn +/// `GroupValuesRows`: crate::aggregates::group_values::row::GroupValuesRows /// -pub(crate) fn new_group_values( +pub fn new_group_values( schema: SchemaRef, group_ordering: &GroupOrdering, ) -> Result> { diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs index 63018874a1e40..599268baec67b 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs @@ -71,6 +71,12 @@ pub struct ByteViewGroupValueBuilder { _phantom: PhantomData, } +impl Default for ByteViewGroupValueBuilder { + fn default() -> Self { + Self::new() + } +} + impl ByteViewGroupValueBuilder { pub fn new() -> Self { Self { diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs index 2ac0389454dec..722bc6049c80b 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs @@ -18,7 +18,7 @@ //! `GroupValues` implementations for multi group by cases mod bytes; -mod bytes_view; +pub mod bytes_view; mod primitive; use std::mem::{self, size_of}; @@ -91,6 +91,11 @@ pub trait GroupColumn: Send + Sync { /// Returns the number of rows stored in this builder fn len(&self) -> usize; + /// true if len == 0 + fn is_empty(&self) -> bool { + self.len() == 0 + } + /// Returns the number of bytes used by this [`GroupColumn`] fn size(&self) -> usize; diff --git a/datafusion/physical-plan/src/aggregates/group_values/single_group_by/bytes.rs b/datafusion/physical-plan/src/aggregates/group_values/single_group_by/bytes.rs index 9686b8c3521d2..21078ceb8aeda 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/single_group_by/bytes.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/single_group_by/bytes.rs @@ -15,11 +15,14 @@ // specific language governing permissions and limitations // under the License. +use std::mem::size_of; + use crate::aggregates::group_values::GroupValues; + use arrow::array::{Array, ArrayRef, OffsetSizeTrait, RecordBatch}; +use datafusion_common::Result; use datafusion_expr::EmitTo; use datafusion_physical_expr_common::binary_map::{ArrowBytesMap, OutputType}; -use std::mem::size_of; /// A [`GroupValues`] storing single column of Utf8/LargeUtf8/Binary/LargeBinary values /// @@ -42,11 +45,7 @@ impl GroupValuesByes { } impl GroupValues for GroupValuesByes { - fn intern( - &mut self, - cols: &[ArrayRef], - groups: &mut Vec, - ) -> datafusion_common::Result<()> { + fn intern(&mut self, cols: &[ArrayRef], groups: &mut Vec) -> Result<()> { assert_eq!(cols.len(), 1); // look up / add entries in the table @@ -85,7 +84,7 @@ impl GroupValues for GroupValuesByes { self.num_groups } - fn emit(&mut self, emit_to: EmitTo) -> datafusion_common::Result> { + fn emit(&mut self, emit_to: EmitTo) -> Result> { // Reset the map to default, and convert it into a single array let map_contents = self.map.take().into_state(); diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs index 14b2d0a932c2a..784b7db893c05 100644 --- a/datafusion/physical-plan/src/aggregates/mod.rs +++ b/datafusion/physical-plan/src/aggregates/mod.rs @@ -54,7 +54,7 @@ use datafusion_physical_expr_common::sort_expr::{ use itertools::Itertools; -pub(crate) mod group_values; +pub mod group_values; mod no_grouping; pub mod order; mod row_hash; @@ -268,7 +268,7 @@ impl PhysicalGroupBy { } /// Returns the number expression as grouping keys. - fn num_group_exprs(&self) -> usize { + pub fn num_group_exprs(&self) -> usize { if self.is_single() { self.expr.len() } else { @@ -332,10 +332,17 @@ impl PhysicalGroupBy { ) .collect(); let num_exprs = expr.len(); + let groups = if self.expr.is_empty() { + // No GROUP BY expressions - should have no groups + vec![] + } else { + // Has GROUP BY expressions - create a single group + vec![vec![false; num_exprs]] + }; Self { expr, null_expr: vec![], - groups: vec![vec![false; num_exprs]], + groups, } } } @@ -1318,7 +1325,7 @@ fn evaluate( } /// Evaluates expressions against a record batch. -pub(crate) fn evaluate_many( +pub fn evaluate_many( expr: &[Vec>], batch: &RecordBatch, ) -> Result>> { @@ -1372,7 +1379,7 @@ fn group_id_array(group: &[bool], batch: &RecordBatch) -> Result { /// The outer Vec appears to be for grouping sets /// The inner Vec contains the results per expression /// The inner-inner Array contains the results per row -pub(crate) fn evaluate_group_by( +pub fn evaluate_group_by( group_by: &PhysicalGroupBy, batch: &RecordBatch, ) -> Result>> { diff --git a/datafusion/physical-plan/src/async_func.rs b/datafusion/physical-plan/src/async_func.rs index 7e9ae827d5d19..dd0fe3f6ecbd1 100644 --- a/datafusion/physical-plan/src/async_func.rs +++ b/datafusion/physical-plan/src/async_func.rs @@ -176,22 +176,23 @@ impl ExecutionPlan for AsyncFuncExec { // now, for each record batch, evaluate the async expressions and add the columns to the result let async_exprs_captured = Arc::new(self.async_exprs.clone()); let schema_captured = self.schema(); - let config_option_ref = Arc::new(context.session_config().options().clone()); + let config_options_ref = Arc::new(context.session_config().options().clone()); let stream_with_async_functions = input_stream.then(move |batch| { // need to clone *again* to capture the async_exprs and schema in the // stream and satisfy lifetime requirements. let async_exprs_captured = Arc::clone(&async_exprs_captured); let schema_captured = Arc::clone(&schema_captured); - let config_option = Arc::clone(&config_option_ref); + let config_options = Arc::clone(&config_options_ref); async move { let batch = batch?; // append the result of evaluating the async expressions to the output let mut output_arrays = batch.columns().to_vec(); for async_expr in async_exprs_captured.iter() { - let output = - async_expr.invoke_with_args(&batch, &config_option).await?; + let output = async_expr + .invoke_with_args(&batch, Arc::clone(&config_options)) + .await?; output_arrays.push(output.to_array(batch.num_rows())?); } let batch = RecordBatch::try_new(schema_captured, output_arrays)?; diff --git a/datafusion/physical-plan/src/coalesce_batches.rs b/datafusion/physical-plan/src/coalesce_batches.rs index 78bd4b4fc3a0b..d98530d28e918 100644 --- a/datafusion/physical-plan/src/coalesce_batches.rs +++ b/datafusion/physical-plan/src/coalesce_batches.rs @@ -234,8 +234,7 @@ impl ExecutionPlan for CoalesceBatchesExec { parent_filters: Vec>, _config: &ConfigOptions, ) -> Result { - Ok(FilterDescription::new_with_child_count(1) - .all_parent_filters_supported(parent_filters)) + FilterDescription::from_children(parent_filters, &self.children()) } fn handle_child_pushdown_result( @@ -244,9 +243,7 @@ impl ExecutionPlan for CoalesceBatchesExec { child_pushdown_result: ChildPushdownResult, _config: &ConfigOptions, ) -> Result>> { - Ok(FilterPushdownPropagation::transparent( - child_pushdown_result, - )) + Ok(FilterPushdownPropagation::if_all(child_pushdown_result)) } } diff --git a/datafusion/physical-plan/src/display.rs b/datafusion/physical-plan/src/display.rs index 56335f13d01bf..1cad0ee85c0da 100644 --- a/datafusion/physical-plan/src/display.rs +++ b/datafusion/physical-plan/src/display.rs @@ -120,6 +120,8 @@ pub struct DisplayableExecutionPlan<'a> { show_statistics: bool, /// If schema should be displayed. See [`Self::set_show_schema`] show_schema: bool, + // (TreeRender) Maximum total width of the rendered tree + tree_maximum_render_width: usize, } impl<'a> DisplayableExecutionPlan<'a> { @@ -131,6 +133,7 @@ impl<'a> DisplayableExecutionPlan<'a> { show_metrics: ShowMetrics::None, show_statistics: false, show_schema: false, + tree_maximum_render_width: 240, } } @@ -143,6 +146,7 @@ impl<'a> DisplayableExecutionPlan<'a> { show_metrics: ShowMetrics::Aggregated, show_statistics: false, show_schema: false, + tree_maximum_render_width: 240, } } @@ -155,6 +159,7 @@ impl<'a> DisplayableExecutionPlan<'a> { show_metrics: ShowMetrics::Full, show_statistics: false, show_schema: false, + tree_maximum_render_width: 240, } } @@ -173,6 +178,12 @@ impl<'a> DisplayableExecutionPlan<'a> { self } + /// Set the maximum render width for the tree format + pub fn set_tree_maximum_render_width(mut self, width: usize) -> Self { + self.tree_maximum_render_width = width; + self + } + /// Return a `format`able structure that produces a single line /// per node. /// @@ -270,14 +281,21 @@ impl<'a> DisplayableExecutionPlan<'a> { pub fn tree_render(&self) -> impl fmt::Display + 'a { struct Wrapper<'a> { plan: &'a dyn ExecutionPlan, + maximum_render_width: usize, } impl fmt::Display for Wrapper<'_> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - let mut visitor = TreeRenderVisitor { f }; + let mut visitor = TreeRenderVisitor { + f, + maximum_render_width: self.maximum_render_width, + }; visitor.visit(self.plan) } } - Wrapper { plan: self.inner } + Wrapper { + plan: self.inner, + maximum_render_width: self.tree_maximum_render_width, + } } /// Return a single-line summary of the root of the plan @@ -540,6 +558,8 @@ impl ExecutionPlanVisitor for GraphvizVisitor<'_, '_> { struct TreeRenderVisitor<'a, 'b> { /// Write to this formatter f: &'a mut Formatter<'b>, + /// Maximum total width of the rendered tree + maximum_render_width: usize, } impl TreeRenderVisitor<'_, '_> { @@ -557,7 +577,6 @@ impl TreeRenderVisitor<'_, '_> { const HORIZONTAL: &'static str = "─"; // Horizontal line // TODO: Make these variables configurable. - const MAXIMUM_RENDER_WIDTH: usize = 240; // Maximum total width of the rendered tree const NODE_RENDER_WIDTH: usize = 29; // Width of each node's box const MAX_EXTRA_LINES: usize = 30; // Maximum number of extra info lines per node @@ -592,6 +611,12 @@ impl TreeRenderVisitor<'_, '_> { y: usize, ) -> Result<(), fmt::Error> { for x in 0..root.width { + if self.maximum_render_width > 0 + && x * Self::NODE_RENDER_WIDTH >= self.maximum_render_width + { + break; + } + if root.has_node(x, y) { write!(self.f, "{}", Self::LTCORNER)?; write!( @@ -662,7 +687,9 @@ impl TreeRenderVisitor<'_, '_> { // Render the actual node. for render_y in 0..=extra_height { for (x, _) in root.nodes.iter().enumerate().take(root.width) { - if x * Self::NODE_RENDER_WIDTH >= Self::MAXIMUM_RENDER_WIDTH { + if self.maximum_render_width > 0 + && x * Self::NODE_RENDER_WIDTH >= self.maximum_render_width + { break; } @@ -780,7 +807,9 @@ impl TreeRenderVisitor<'_, '_> { y: usize, ) -> Result<(), fmt::Error> { for x in 0..=root.width { - if x * Self::NODE_RENDER_WIDTH >= Self::MAXIMUM_RENDER_WIDTH { + if self.maximum_render_width > 0 + && x * Self::NODE_RENDER_WIDTH >= self.maximum_render_width + { break; } let mut has_adjacent_nodes = false; diff --git a/datafusion/physical-plan/src/execution_plan.rs b/datafusion/physical-plan/src/execution_plan.rs index 90385c58a6ac2..6d51bf195dc6f 100644 --- a/datafusion/physical-plan/src/execution_plan.rs +++ b/datafusion/physical-plan/src/execution_plan.rs @@ -17,8 +17,8 @@ pub use crate::display::{DefaultDisplay, DisplayAs, DisplayFormatType, VerboseDisplay}; use crate::filter_pushdown::{ - ChildPushdownResult, FilterDescription, FilterPushdownPhase, - FilterPushdownPropagation, + ChildFilterDescription, ChildPushdownResult, FilterDescription, FilterPushdownPhase, + FilterPushdownPropagation, PushedDownPredicate, }; pub use crate::metrics::Metric; pub use crate::ordering::InputOrderMode; @@ -33,6 +33,7 @@ pub use datafusion_physical_expr::window::WindowExpr; pub use datafusion_physical_expr::{ expressions, Distribution, Partitioning, PhysicalExpr, }; +use itertools::Itertools; use std::any::Any; use std::fmt::Debug; @@ -520,10 +521,19 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync { parent_filters: Vec>, _config: &ConfigOptions, ) -> Result { - Ok( - FilterDescription::new_with_child_count(self.children().len()) - .all_parent_filters_unsupported(parent_filters), - ) + // Default implementation: mark all filters as unsupported for all children + let mut desc = FilterDescription::new(); + let child_filters = parent_filters + .iter() + .map(|f| PushedDownPredicate::unsupported(Arc::clone(f))) + .collect_vec(); + for _ in 0..self.children().len() { + desc = desc.with_child(ChildFilterDescription { + parent_filters: child_filters.clone(), + self_filters: vec![], + }); + } + Ok(desc) } /// Handle the result of a child pushdown. @@ -553,7 +563,7 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync { /// they have been handled. /// - A `HashJoinExec` might ignore the pushdown result if filters need to /// be applied during the join operation. It passes the parent filters back - /// up wrapped in [`FilterPushdownPropagation::transparent`], discarding + /// up wrapped in [`FilterPushdownPropagation::if_any`], discarding /// any self-filters from children. /// /// **Example Walkthrough:** @@ -587,16 +597,16 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync { /// /// **Helper Methods for Customization:** /// There are various helper methods to simplify implementing this method: - /// - [`FilterPushdownPropagation::unsupported`]: Indicates that the node - /// does not support filter pushdown at all, rejecting all filters. - /// - [`FilterPushdownPropagation::transparent`]: Indicates that the node - /// supports filter pushdown but does not modify it, simply transmitting - /// the children's pushdown results back up to its parent. - /// - [`PredicateSupports::new_with_supported_check`]: Takes a callback to - /// dynamically determine support for each filter, useful with - /// [`FilterPushdownPropagation::with_filters`] and - /// [`FilterPushdownPropagation::with_updated_node`] to build mixed results - /// of supported and unsupported filters. + /// - [`FilterPushdownPropagation::if_any`]: Marks all parent filters as + /// supported as long as at least one child supports them. + /// - [`FilterPushdownPropagation::if_all`]: Marks all parent filters as + /// supported as long as all children support them. + /// - [`FilterPushdownPropagation::with_parent_pushdown_result`]: Allows adding filters + /// to the propagation result, indicating which filters are supported by + /// the current node. + /// - [`FilterPushdownPropagation::with_updated_node`]: Allows updating the + /// current node in the propagation result, used if the node + /// has modified its plan based on the pushdown results. /// /// **Filter Pushdown Phases:** /// There are two different phases in filter pushdown (`Pre` and others), @@ -604,17 +614,14 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync { /// operator may or may not be allowed to modify the plan. See /// [`FilterPushdownPhase`] for more details on phase-specific behavior. /// - /// [`PredicateSupport::Supported`]: crate::filter_pushdown::PredicateSupport::Supported - /// [`PredicateSupports::new_with_supported_check`]: crate::filter_pushdown::PredicateSupports::new_with_supported_check + /// [`PushedDownPredicate::supported`]: crate::filter_pushdown::PushedDownPredicate::supported fn handle_child_pushdown_result( &self, _phase: FilterPushdownPhase, child_pushdown_result: ChildPushdownResult, _config: &ConfigOptions, ) -> Result>> { - Ok(FilterPushdownPropagation::transparent( - child_pushdown_result, - )) + Ok(FilterPushdownPropagation::if_all(child_pushdown_result)) } /// Injects arbitrary run-time state into this execution plan, returning a new plan diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs index 252af9ebcd496..8157e1b721a68 100644 --- a/datafusion/physical-plan/src/filter.rs +++ b/datafusion/physical-plan/src/filter.rs @@ -16,11 +16,12 @@ // under the License. use std::any::Any; -use std::collections::HashMap; use std::pin::Pin; use std::sync::Arc; use std::task::{ready, Context, Poll}; +use itertools::Itertools; + use super::{ ColumnStatistics, DisplayAs, ExecutionPlanProperties, PlanProperties, RecordBatchStream, SendableRecordBatchStream, Statistics, @@ -28,8 +29,8 @@ use super::{ use crate::common::can_project; use crate::execution_plan::CardinalityEffect; use crate::filter_pushdown::{ - ChildPushdownResult, FilterDescription, FilterPushdownPhase, - FilterPushdownPropagation, + ChildFilterDescription, ChildPushdownResult, FilterDescription, FilterPushdownPhase, + FilterPushdownPropagation, PushedDown, PushedDownPredicate, }; use crate::projection::{ make_with_child, try_embed_projection, update_expr, EmbeddedProjection, @@ -46,9 +47,6 @@ use arrow::record_batch::RecordBatch; use datafusion_common::cast::as_boolean_array; use datafusion_common::config::ConfigOptions; use datafusion_common::stats::Precision; -use datafusion_common::tree_node::{ - Transformed, TransformedResult, TreeNode, TreeNodeRecursion, -}; use datafusion_common::{ internal_err, plan_err, project_schema, DataFusionError, Result, ScalarValue, }; @@ -65,7 +63,6 @@ use datafusion_physical_expr::{ use datafusion_physical_expr_common::physical_expr::fmt_sql; use futures::stream::{Stream, StreamExt}; -use itertools::Itertools; use log::trace; const FILTER_EXEC_DEFAULT_SELECTIVITY: u8 = 20; @@ -455,56 +452,26 @@ impl ExecutionPlan for FilterExec { _config: &ConfigOptions, ) -> Result { if !matches!(phase, FilterPushdownPhase::Pre) { - return Ok(FilterDescription::new_with_child_count(1) - .all_parent_filters_supported(parent_filters)); + // For non-pre phase, filters pass through unchanged + let filter_supports = parent_filters + .into_iter() + .map(PushedDownPredicate::supported) + .collect(); + return Ok(FilterDescription::new().with_child(ChildFilterDescription { + parent_filters: filter_supports, + self_filters: vec![], + })); } - let self_filter = split_conjunction(&self.predicate) - .into_iter() - .cloned() - .collect_vec(); - let parent_filters = if let Some(projection_indices) = self.projection.as_ref() { - // We need to invert the projection on any referenced columns in the filter - // Create a mapping from the output columns to the input columns (the inverse of the projection) - let inverse_projection = projection_indices - .iter() - .enumerate() - .map(|(i, &p)| (p, i)) - .collect::>(); - parent_filters - .into_iter() - .map(|f| { - f.transform_up(|expr| { - let mut res = - if let Some(col) = expr.as_any().downcast_ref::() { - let index = col.index(); - let index_in_input_schema = - inverse_projection.get(&index).ok_or_else(|| { - DataFusionError::Internal(format!( - "Column {index} not found in projection" - )) - })?; - Transformed::yes(Arc::new(Column::new( - col.name(), - *index_in_input_schema, - )) as _) - } else { - Transformed::no(expr) - }; - // Columns can only exist in the leaves, no need to try all nodes - res.tnr = TreeNodeRecursion::Jump; - Ok(res) - }) - .data() - }) - .collect::>>()? - } else { - parent_filters - }; + let child = ChildFilterDescription::from_child(&parent_filters, self.input())? + .with_self_filters( + split_conjunction(&self.predicate) + .into_iter() + .cloned() + .collect(), + ); - Ok(FilterDescription::new_with_child_count(1) - .all_parent_filters_supported(parent_filters) - .with_self_filters_for_children(vec![self_filter])) + Ok(FilterDescription::new().with_child(child)) } fn handle_child_pushdown_result( @@ -514,21 +481,28 @@ impl ExecutionPlan for FilterExec { _config: &ConfigOptions, ) -> Result>> { if !matches!(phase, FilterPushdownPhase::Pre) { - return Ok(FilterPushdownPropagation::transparent( - child_pushdown_result, - )); + return Ok(FilterPushdownPropagation::if_all(child_pushdown_result)); } // We absorb any parent filters that were not handled by our children - let mut unhandled_filters = - child_pushdown_result.parent_filters.collect_unsupported(); - assert_eq!( - child_pushdown_result.self_filters.len(), - 1, - "FilterExec should only have one child" - ); - let unsupported_self_filters = - child_pushdown_result.self_filters[0].collect_unsupported(); - unhandled_filters.extend(unsupported_self_filters); + let unsupported_parent_filters = + child_pushdown_result.parent_filters.iter().filter_map(|f| { + matches!(f.all(), PushedDown::No).then_some(Arc::clone(&f.filter)) + }); + let unsupported_self_filters = child_pushdown_result + .self_filters + .first() + .expect("we have exactly one child") + .iter() + .filter_map(|f| match f.discriminant { + PushedDown::Yes => None, + PushedDown::No => Some(&f.predicate), + }) + .cloned(); + + let unhandled_filters = unsupported_parent_filters + .into_iter() + .chain(unsupported_self_filters) + .collect_vec(); // If we have unhandled filters, we need to create a new FilterExec let filter_input = Arc::clone(self.input()); @@ -577,8 +551,9 @@ impl ExecutionPlan for FilterExec { }; Some(Arc::new(new) as _) }; + Ok(FilterPushdownPropagation { - filters: child_pushdown_result.parent_filters.make_supported(), + filters: vec![PushedDown::Yes; child_pushdown_result.parent_filters.len()], updated_node, }) } @@ -741,7 +716,9 @@ impl RecordBatchStream for FilterExecStream { } /// Return the equals Column-Pairs and Non-equals Column-Pairs -fn collect_columns_from_predicate(predicate: &Arc) -> EqualAndNonEqual { +pub fn collect_columns_from_predicate( + predicate: &Arc, +) -> EqualAndNonEqual { let mut eq_predicate_columns = Vec::::new(); let mut ne_predicate_columns = Vec::::new(); diff --git a/datafusion/physical-plan/src/filter_pushdown.rs b/datafusion/physical-plan/src/filter_pushdown.rs index 725abd7fc8b5d..a3e94a75c8e77 100644 --- a/datafusion/physical-plan/src/filter_pushdown.rs +++ b/datafusion/physical-plan/src/filter_pushdown.rs @@ -15,9 +15,30 @@ // specific language governing permissions and limitations // under the License. +//! Filter Pushdown Optimization Process +//! +//! The filter pushdown mechanism involves four key steps: +//! 1. **Optimizer Asks Parent for a Filter Pushdown Plan**: The optimizer calls [`ExecutionPlan::gather_filters_for_pushdown`] +//! on the parent node, passing in parent predicates and phase. The parent node creates a [`FilterDescription`] +//! by inspecting its logic and children's schemas, determining which filters can be pushed to each child. +//! 2. **Optimizer Executes Pushdown**: The optimizer recursively pushes down filters for each child, +//! passing the appropriate filters (`Vec>`) for that child. +//! 3. **Optimizer Gathers Results**: The optimizer collects [`FilterPushdownPropagation`] results from children, +//! containing information about which filters were successfully pushed down vs. unsupported. +//! 4. **Parent Responds**: The optimizer calls [`ExecutionPlan::handle_child_pushdown_result`] on the parent, +//! passing a [`ChildPushdownResult`] containing the aggregated pushdown outcomes. The parent decides +//! how to handle filters that couldn't be pushed down (e.g., keep them as FilterExec nodes). +//! +//! [`ExecutionPlan::gather_filters_for_pushdown`]: crate::ExecutionPlan::gather_filters_for_pushdown +//! [`ExecutionPlan::handle_child_pushdown_result`]: crate::ExecutionPlan::handle_child_pushdown_result +//! +//! See also datafusion/physical-optimizer/src/filter_pushdown.rs. + +use std::collections::HashSet; use std::sync::Arc; -use std::vec::IntoIter; +use datafusion_common::Result; +use datafusion_physical_expr::utils::{collect_columns, reassign_predicate_columns}; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; #[derive(Debug, Clone, Copy)] @@ -65,182 +86,111 @@ impl std::fmt::Display for FilterPushdownPhase { /// The result of a plan for pushing down a filter into a child node. /// This contains references to filters so that nodes can mutate a filter /// before pushing it down to a child node (e.g. to adjust a projection) -/// or can directly take ownership of `Unsupported` filters that their children +/// or can directly take ownership of filters that their children /// could not handle. #[derive(Debug, Clone)] -pub enum PredicateSupport { - Supported(Arc), - Unsupported(Arc), +pub struct PushedDownPredicate { + pub discriminant: PushedDown, + pub predicate: Arc, } -impl PredicateSupport { +impl PushedDownPredicate { + /// Return the wrapped [`PhysicalExpr`], discarding whether it is supported or unsupported. pub fn into_inner(self) -> Arc { - match self { - PredicateSupport::Supported(expr) | PredicateSupport::Unsupported(expr) => { - expr - } - } - } -} - -/// A thin wrapper around [`PredicateSupport`]s that allows for easy collection of -/// supported and unsupported filters. Inner vector stores each predicate for one node. -#[derive(Debug, Clone)] -pub struct PredicateSupports(Vec); - -impl PredicateSupports { - /// Create a new FilterPushdowns with the given filters and their pushdown status. - pub fn new(pushdowns: Vec) -> Self { - Self(pushdowns) - } - - /// Create a new [`PredicateSupport`] with all filters as supported. - pub fn all_supported(filters: Vec>) -> Self { - let pushdowns = filters - .into_iter() - .map(PredicateSupport::Supported) - .collect(); - Self::new(pushdowns) - } - - /// Create a new [`PredicateSupport`] with all filters as unsupported. - pub fn all_unsupported(filters: Vec>) -> Self { - let pushdowns = filters - .into_iter() - .map(PredicateSupport::Unsupported) - .collect(); - Self::new(pushdowns) - } - - /// Create a new [`PredicateSupport`] with filterrs marked as supported if - /// `f` returns true and unsupported otherwise. - pub fn new_with_supported_check( - filters: Vec>, - check: impl Fn(&Arc) -> bool, - ) -> Self { - let pushdowns = filters - .into_iter() - .map(|f| { - if check(&f) { - PredicateSupport::Supported(f) - } else { - PredicateSupport::Unsupported(f) - } - }) - .collect(); - Self::new(pushdowns) + self.predicate } - /// Transform all filters to supported, returning a new [`PredicateSupports`] - /// with all filters as [`PredicateSupport::Supported`]. - /// This does not modify the original [`PredicateSupport`]. - pub fn make_supported(self) -> Self { - let pushdowns = self - .0 - .into_iter() - .map(|f| match f { - PredicateSupport::Supported(expr) => PredicateSupport::Supported(expr), - PredicateSupport::Unsupported(expr) => PredicateSupport::Supported(expr), - }) - .collect(); - Self::new(pushdowns) - } - - /// Transform all filters to unsupported, returning a new [`PredicateSupports`] - /// with all filters as [`PredicateSupport::Supported`]. - /// This does not modify the original [`PredicateSupport`]. - pub fn make_unsupported(self) -> Self { - let pushdowns = self - .0 - .into_iter() - .map(|f| match f { - PredicateSupport::Supported(expr) => PredicateSupport::Unsupported(expr), - u @ PredicateSupport::Unsupported(_) => u, - }) - .collect(); - Self::new(pushdowns) - } - - /// Collect unsupported filters into a Vec, without removing them from the original - /// [`PredicateSupport`]. - pub fn collect_unsupported(&self) -> Vec> { - self.0 - .iter() - .filter_map(|f| match f { - PredicateSupport::Unsupported(expr) => Some(Arc::clone(expr)), - PredicateSupport::Supported(_) => None, - }) - .collect() - } - - /// Collect supported filters into a Vec, without removing them from the original - /// [`PredicateSupport`]. - pub fn collect_supported(&self) -> Vec> { - self.0 - .iter() - .filter_map(|f| match f { - PredicateSupport::Supported(expr) => Some(Arc::clone(expr)), - PredicateSupport::Unsupported(_) => None, - }) - .collect() - } - - /// Collect all filters into a Vec, without removing them from the original - /// FilterPushdowns. - pub fn collect_all(self) -> Vec> { - self.0 - .into_iter() - .map(|f| match f { - PredicateSupport::Supported(expr) - | PredicateSupport::Unsupported(expr) => expr, - }) - .collect() + /// Create a new [`PushedDownPredicate`] with supported pushdown. + pub fn supported(predicate: Arc) -> Self { + Self { + discriminant: PushedDown::Yes, + predicate, + } } - pub fn into_inner(self) -> Vec { - self.0 + /// Create a new [`PushedDownPredicate`] with unsupported pushdown. + pub fn unsupported(predicate: Arc) -> Self { + Self { + discriminant: PushedDown::No, + predicate, + } } +} - /// Return an iterator over the inner `Vec`. - pub fn iter(&self) -> impl Iterator { - self.0.iter() - } +/// Discriminant for the result of pushing down a filter into a child node. +#[derive(Debug, Clone, Copy)] +pub enum PushedDown { + /// The predicate was successfully pushed down into the child node. + Yes, + /// The predicate could not be pushed down into the child node. + No, +} - /// Return the number of filters in the inner `Vec`. - pub fn len(&self) -> usize { - self.0.len() +impl PushedDown { + /// Logical AND operation: returns `Yes` only if both operands are `Yes`. + pub fn and(self, other: PushedDown) -> PushedDown { + match (self, other) { + (PushedDown::Yes, PushedDown::Yes) => PushedDown::Yes, + _ => PushedDown::No, + } } - /// Check if the inner `Vec` is empty. - pub fn is_empty(&self) -> bool { - self.0.is_empty() + /// Logical OR operation: returns `Yes` if either operand is `Yes`. + pub fn or(self, other: PushedDown) -> PushedDown { + match (self, other) { + (PushedDown::Yes, _) | (_, PushedDown::Yes) => PushedDown::Yes, + (PushedDown::No, PushedDown::No) => PushedDown::No, + } } - /// Check if all filters are supported. - pub fn is_all_supported(&self) -> bool { - self.0 - .iter() - .all(|f| matches!(f, PredicateSupport::Supported(_))) + /// Wrap a [`PhysicalExpr`] with this pushdown result. + pub fn wrap_expression(self, expr: Arc) -> PushedDownPredicate { + PushedDownPredicate { + discriminant: self, + predicate: expr, + } } +} - /// Check if all filters are unsupported. - pub fn is_all_unsupported(&self) -> bool { - self.0 - .iter() - .all(|f| matches!(f, PredicateSupport::Unsupported(_))) - } +/// The result of pushing down a single parent filter into all children. +#[derive(Debug, Clone)] +pub struct ChildFilterPushdownResult { + pub filter: Arc, + pub child_results: Vec, } -impl IntoIterator for PredicateSupports { - type Item = PredicateSupport; - type IntoIter = IntoIter; +impl ChildFilterPushdownResult { + /// Combine all child results using OR logic. + /// Returns `Yes` if **any** child supports the filter. + /// Returns `No` if **all** children reject the filter or if there are no children. + pub fn any(&self) -> PushedDown { + if self.child_results.is_empty() { + // If there are no children, filters cannot be supported + PushedDown::No + } else { + self.child_results + .iter() + .fold(PushedDown::No, |acc, result| acc.or(*result)) + } + } - fn into_iter(self) -> Self::IntoIter { - self.0.into_iter() + /// Combine all child results using AND logic. + /// Returns `Yes` if **all** children support the filter. + /// Returns `No` if **any** child rejects the filter or if there are no children. + pub fn all(&self) -> PushedDown { + if self.child_results.is_empty() { + // If there are no children, filters cannot be supported + PushedDown::No + } else { + self.child_results + .iter() + .fold(PushedDown::Yes, |acc, result| acc.and(*result)) + } } } /// The result of pushing down filters into a child node. +/// /// This is the result provided to nodes in [`ExecutionPlan::handle_child_pushdown_result`]. /// Nodes process this result and convert it into a [`FilterPushdownPropagation`] /// that is returned to their parent. @@ -248,61 +198,68 @@ impl IntoIterator for PredicateSupports { /// [`ExecutionPlan::handle_child_pushdown_result`]: crate::ExecutionPlan::handle_child_pushdown_result #[derive(Debug, Clone)] pub struct ChildPushdownResult { - /// The combined result of pushing down each parent filter into each child. - /// For example, given the fitlers `[a, b]` and children `[1, 2, 3]` the matrix of responses: - /// - // | filter | child 1 | child 2 | child 3 | result | - // |--------|-------------|-----------|-----------|-------------| - // | a | Supported | Supported | Supported | Supported | - // | b | Unsupported | Supported | Supported | Unsupported | - /// - /// That is: if any child marks a filter as unsupported or if the filter was not pushed - /// down into any child then the result is unsupported. - /// If at least one children and all children that received the filter mark it as supported - /// then the result is supported. - pub parent_filters: PredicateSupports, + /// The parent filters that were pushed down as received by the current node when [`ExecutionPlan::gather_filters_for_pushdown`](crate::ExecutionPlan::handle_child_pushdown_result) was called. + /// Note that this may *not* be the same as the filters that were passed to the children as the current node may have modified them + /// (e.g. by reassigning column indices) when it returned them from [`ExecutionPlan::gather_filters_for_pushdown`](crate::ExecutionPlan::handle_child_pushdown_result) in a [`FilterDescription`]. + /// Attached to each filter is a [`PushedDown`] *per child* that indicates whether the filter was supported or unsupported by each child. + /// To get combined results see [`ChildFilterPushdownResult::any`] and [`ChildFilterPushdownResult::all`]. + pub parent_filters: Vec, /// The result of pushing down each filter this node provided into each of it's children. - /// This is not combined with the parent filters so that nodes can treat each child independently. - pub self_filters: Vec, + /// The outer vector corresponds to each child, and the inner vector corresponds to each filter. + /// Since this node may have generated a different filter for each child the inner vector may have different lengths or the expressions may not match at all. + /// It is up to each node to interpret this result based on the filters it provided for each child in [`ExecutionPlan::gather_filters_for_pushdown`](crate::ExecutionPlan::handle_child_pushdown_result). + pub self_filters: Vec>, } -/// The result of pushing down filters into a node that it returns to its parent. -/// This is what nodes return from [`ExecutionPlan::handle_child_pushdown_result`] to communicate +/// The result of pushing down filters into a node. +/// +/// Returned from [`ExecutionPlan::handle_child_pushdown_result`] to communicate /// to the optimizer: /// -/// 1. What to do with any parent filters that were not completely handled by the children. +/// 1. What to do with any parent filters that were could not be pushed down into the children. /// 2. If the node needs to be replaced in the execution plan with a new node or not. /// /// [`ExecutionPlan::handle_child_pushdown_result`]: crate::ExecutionPlan::handle_child_pushdown_result #[derive(Debug, Clone)] pub struct FilterPushdownPropagation { - pub filters: PredicateSupports, + /// What filters were pushed into the parent node. + pub filters: Vec, + /// The updated node, if it was updated during pushdown pub updated_node: Option, } impl FilterPushdownPropagation { - /// Create a new [`FilterPushdownPropagation`] that tells the parent node - /// that echoes back up to the parent the result of pushing down the filters - /// into the children. - pub fn transparent(child_pushdown_result: ChildPushdownResult) -> Self { + /// Create a new [`FilterPushdownPropagation`] that tells the parent node that each parent filter + /// is supported if it was supported by *all* children. + pub fn if_all(child_pushdown_result: ChildPushdownResult) -> Self { + let filters = child_pushdown_result + .parent_filters + .into_iter() + .map(|result| result.all()) + .collect(); Self { - filters: child_pushdown_result.parent_filters, + filters, updated_node: None, } } - /// Create a new [`FilterPushdownPropagation`] that tells the parent node - /// that none of the parent filters were not pushed down. - pub fn unsupported(parent_filters: Vec>) -> Self { - let unsupported = PredicateSupports::all_unsupported(parent_filters); + /// Create a new [`FilterPushdownPropagation`] that tells the parent node that each parent filter + /// is supported if it was supported by *any* child. + pub fn if_any(child_pushdown_result: ChildPushdownResult) -> Self { + let filters = child_pushdown_result + .parent_filters + .into_iter() + .map(|result| result.any()) + .collect(); Self { - filters: unsupported, + filters, updated_node: None, } } /// Create a new [`FilterPushdownPropagation`] with the specified filter support. - pub fn with_filters(filters: PredicateSupports) -> Self { + /// This transmits up to our parent node what the result of pushing down the filters into our node and possibly our subtree was. + pub fn with_parent_pushdown_result(filters: Vec) -> Self { Self { filters, updated_node: None, @@ -310,34 +267,105 @@ impl FilterPushdownPropagation { } /// Bind an updated node to the [`FilterPushdownPropagation`]. + /// Use this when the current node wants to update iself in the tree or replace itself with a new node (e.g. one of it's children). + /// You do not need to call this if one of the children of the current node may have updated itself, that is handled by the optimizer. pub fn with_updated_node(mut self, updated_node: T) -> Self { self.updated_node = Some(updated_node); self } } +/// Describes filter pushdown for a single child node. +/// +/// This structure contains two types of filters: +/// - **Parent filters**: Filters received from the parent node, marked as supported or unsupported +/// - **Self filters**: Filters generated by the current node to be pushed down to this child #[derive(Debug, Clone)] -struct ChildFilterDescription { +pub struct ChildFilterDescription { /// Description of which parent filters can be pushed down into this node. /// Since we need to transmit filter pushdown results back to this node's parent /// we need to track each parent filter for each child, even those that are unsupported / won't be pushed down. - /// We do this using a [`PredicateSupport`] which simplifies manipulating supported/unsupported filters. - parent_filters: PredicateSupports, + pub(crate) parent_filters: Vec, /// Description of which filters this node is pushing down to its children. /// Since this is not transmitted back to the parents we can have variable sized inner arrays /// instead of having to track supported/unsupported. - self_filters: Vec>, + pub(crate) self_filters: Vec>, } impl ChildFilterDescription { - fn new() -> Self { - Self { - parent_filters: PredicateSupports::new(vec![]), - self_filters: vec![], + /// Build a child filter description by analyzing which parent filters can be pushed to a specific child. + /// + /// This method performs column analysis to determine which filters can be pushed down: + /// - If all columns referenced by a filter exist in the child's schema, it can be pushed down + /// - Otherwise, it cannot be pushed down to that child + /// + /// See [`FilterDescription::from_children`] for more details + pub fn from_child( + parent_filters: &[Arc], + child: &Arc, + ) -> Result { + let child_schema = child.schema(); + + // Get column names from child schema for quick lookup + let child_column_names: HashSet<&str> = child_schema + .fields() + .iter() + .map(|f| f.name().as_str()) + .collect(); + + // Analyze each parent filter + let mut child_parent_filters = Vec::with_capacity(parent_filters.len()); + + for filter in parent_filters { + // Check which columns the filter references + let referenced_columns = collect_columns(filter); + + // Check if all referenced columns exist in the child schema + let all_columns_exist = referenced_columns + .iter() + .all(|col| child_column_names.contains(col.name())); + + if all_columns_exist { + // All columns exist in child - we can push down + // Need to reassign column indices to match child schema + let reassigned_filter = + reassign_predicate_columns(Arc::clone(filter), &child_schema, false)?; + child_parent_filters + .push(PushedDownPredicate::supported(reassigned_filter)); + } else { + // Some columns don't exist in child - cannot push down + child_parent_filters + .push(PushedDownPredicate::unsupported(Arc::clone(filter))); + } } + + Ok(Self { + parent_filters: child_parent_filters, + self_filters: vec![], + }) + } + + /// Add a self filter (from the current node) to be pushed down to this child. + pub fn with_self_filter(mut self, filter: Arc) -> Self { + self.self_filters.push(filter); + self + } + + /// Add multiple self filters. + pub fn with_self_filters(mut self, filters: Vec>) -> Self { + self.self_filters.extend(filters); + self } } +/// Describes how filters should be pushed down to children. +/// +/// This structure contains filter descriptions for each child node, specifying: +/// - Which parent filters can be pushed down to each child +/// - Which self-generated filters should be pushed down to each child +/// +/// The filter routing is determined by column analysis - filters can only be pushed +/// to children whose schemas contain all the referenced columns. #[derive(Debug, Clone)] pub struct FilterDescription { /// A filter description for each child. @@ -346,14 +374,46 @@ pub struct FilterDescription { child_filter_descriptions: Vec, } +impl Default for FilterDescription { + fn default() -> Self { + Self::new() + } +} + impl FilterDescription { - pub fn new_with_child_count(num_children: usize) -> Self { + /// Create a new empty FilterDescription + pub fn new() -> Self { Self { - child_filter_descriptions: vec![ChildFilterDescription::new(); num_children], + child_filter_descriptions: vec![], } } - pub fn parent_filters(&self) -> Vec { + /// Add a child filter description + pub fn with_child(mut self, child: ChildFilterDescription) -> Self { + self.child_filter_descriptions.push(child); + self + } + + /// Build a filter description by analyzing which parent filters can be pushed to each child. + /// This method automatically determines filter routing based on column analysis: + /// - If all columns referenced by a filter exist in a child's schema, it can be pushed down + /// - Otherwise, it cannot be pushed down to that child + pub fn from_children( + parent_filters: Vec>, + children: &[&Arc], + ) -> Result { + let mut desc = Self::new(); + + // For each child, create a ChildFilterDescription + for child in children { + desc = desc + .with_child(ChildFilterDescription::from_child(&parent_filters, child)?); + } + + Ok(desc) + } + + pub fn parent_filters(&self) -> Vec> { self.child_filter_descriptions .iter() .map(|d| &d.parent_filters) @@ -368,70 +428,4 @@ impl FilterDescription { .cloned() .collect() } - - /// Mark all parent filters as supported for all children. - /// This is the case if the node allows filters to be pushed down through it - /// without any modification. - /// This broadcasts the parent filters to all children. - /// If handling of parent filters is different for each child then you should set the - /// field direclty. - /// For example, nodes like [`RepartitionExec`] that let filters pass through it transparently - /// use this to mark all parent filters as supported. - /// - /// [`RepartitionExec`]: crate::repartition::RepartitionExec - pub fn all_parent_filters_supported( - mut self, - parent_filters: Vec>, - ) -> Self { - let supported = PredicateSupports::all_supported(parent_filters); - for child in &mut self.child_filter_descriptions { - child.parent_filters = supported.clone(); - } - self - } - - /// Mark all parent filters as unsupported for all children. - /// This is the case if the node does not allow filters to be pushed down through it. - /// This broadcasts the parent filters to all children. - /// If handling of parent filters is different for each child then you should set the - /// field direclty. - /// For example, the default implementation of filter pushdwon in [`ExecutionPlan`] - /// assumes that filters cannot be pushed down to children. - /// - /// [`ExecutionPlan`]: crate::ExecutionPlan - pub fn all_parent_filters_unsupported( - mut self, - parent_filters: Vec>, - ) -> Self { - let unsupported = PredicateSupports::all_unsupported(parent_filters); - for child in &mut self.child_filter_descriptions { - child.parent_filters = unsupported.clone(); - } - self - } - - /// Add a filter generated / owned by the current node to be pushed down to all children. - /// This assumes that there is a single filter that that gets pushed down to all children - /// equally. - /// If there are multiple filters or pushdown to children is not homogeneous then - /// you should set the field directly. - /// For example: - /// - `TopK` uses this to push down a single filter to all children, it can use this method. - /// - `HashJoinExec` pushes down a filter only to the probe side, it cannot use this method. - pub fn with_self_filter(mut self, predicate: Arc) -> Self { - for child in &mut self.child_filter_descriptions { - child.self_filters = vec![Arc::clone(&predicate)]; - } - self - } - - pub fn with_self_filters_for_children( - mut self, - filters: Vec>>, - ) -> Self { - for (child, filters) in self.child_filter_descriptions.iter_mut().zip(filters) { - child.self_filters = filters; - } - self - } } diff --git a/datafusion/physical-plan/src/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs index e4d554ceb62cf..a41e668ab4dab 100644 --- a/datafusion/physical-plan/src/joins/cross_join.rs +++ b/datafusion/physical-plan/src/joins/cross_join.rs @@ -559,7 +559,8 @@ impl CrossJoinStream { handle_state!(ready!(self.fetch_probe_batch(cx))) } CrossJoinStreamState::BuildBatches(_) => { - handle_state!(self.build_batches()) + let poll = handle_state!(self.build_batches()); + self.join_metrics.baseline.record_poll(poll) } }; } @@ -632,7 +633,6 @@ impl CrossJoinStream { } self.join_metrics.output_batches.add(1); - self.join_metrics.output_rows.add(batch.num_rows()); return Ok(StatefulStreamResult::Ready(Some(batch))); } } @@ -647,7 +647,7 @@ impl CrossJoinStream { mod tests { use super::*; use crate::common; - use crate::test::build_table_scan_i32; + use crate::test::{assert_join_metrics, build_table_scan_i32}; use datafusion_common::{assert_contains, test_util::batches_to_sort_string}; use datafusion_execution::runtime_env::RuntimeEnvBuilder; @@ -657,14 +657,15 @@ mod tests { left: Arc, right: Arc, context: Arc, - ) -> Result<(Vec, Vec)> { + ) -> Result<(Vec, Vec, MetricsSet)> { let join = CrossJoinExec::new(left, right); let columns_header = columns(&join.schema()); let stream = join.execute(0, context)?; let batches = common::collect(stream).await?; + let metrics = join.metrics().unwrap(); - Ok((columns_header, batches)) + Ok((columns_header, batches, metrics)) } #[tokio::test] @@ -831,7 +832,7 @@ mod tests { ("c2", &vec![14, 15]), ); - let (columns, batches) = join_collect(left, right, task_ctx).await?; + let (columns, batches, metrics) = join_collect(left, right, task_ctx).await?; assert_eq!(columns, vec!["a1", "b1", "c1", "a2", "b2", "c2"]); @@ -848,6 +849,8 @@ mod tests { +----+----+----+----+----+----+ "#); + assert_join_metrics!(metrics, 6); + Ok(()) } diff --git a/datafusion/physical-plan/src/joins/hash_join.rs b/datafusion/physical-plan/src/joins/hash_join.rs index 770399290dca5..a7f28ede4408f 100644 --- a/datafusion/physical-plan/src/joins/hash_join.rs +++ b/datafusion/physical-plan/src/joins/hash_join.rs @@ -34,6 +34,7 @@ use super::{ }; use super::{JoinOn, JoinOnRef}; use crate::execution_plan::{boundedness_from_children, EmissionType}; +use crate::joins::join_hash_map::{JoinHashMapU32, JoinHashMapU64}; use crate::projection::{ try_embed_projection, try_pushdown_through_join, EmbeddedProjection, JoinData, ProjectionExec, @@ -47,10 +48,10 @@ use crate::{ joins::join_hash_map::JoinHashMapOffset, joins::utils::{ adjust_indices_by_join_type, apply_join_filter_to_indices, - build_batch_from_indices, build_join_schema, check_join_is_valid, - estimate_join_statistics, need_produce_result_in_final, + build_batch_empty_build_side, build_batch_from_indices, build_join_schema, + check_join_is_valid, estimate_join_statistics, need_produce_result_in_final, symmetric_join_output_partitioning, BuildProbeJoinMetrics, ColumnIndex, - JoinFilter, JoinHashMap, JoinHashMapType, StatefulStreamResult, + JoinFilter, JoinHashMapType, StatefulStreamResult, }, metrics::{ExecutionPlanMetricsSet, MetricsSet}, DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, Partitioning, @@ -69,8 +70,8 @@ use arrow::record_batch::RecordBatch; use arrow::util::bit_util; use datafusion_common::utils::memory::estimate_memory_size; use datafusion_common::{ - internal_datafusion_err, internal_err, plan_err, project_schema, DataFusionError, - JoinSide, JoinType, NullEquality, Result, + internal_datafusion_err, internal_err, plan_err, project_schema, JoinSide, JoinType, + NullEquality, Result, }; use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation}; use datafusion_execution::TaskContext; @@ -93,7 +94,7 @@ const HASH_JOIN_SEED: RandomState = /// HashTable and input data for the left (build side) of a join struct JoinLeftData { /// The hash table with indices into `batch` - hash_map: JoinHashMap, + hash_map: Box, /// The input rows for the build side batch: RecordBatch, /// The build side on expressions values @@ -113,7 +114,7 @@ struct JoinLeftData { impl JoinLeftData { /// Create a new `JoinLeftData` from its parts fn new( - hash_map: JoinHashMap, + hash_map: Box, batch: RecordBatch, values: Vec, visited_indices_bitmap: SharedBitmapBuilder, @@ -131,8 +132,8 @@ impl JoinLeftData { } /// return a reference to the hash map - fn hash_map(&self) -> &JoinHashMap { - &self.hash_map + fn hash_map(&self) -> &dyn JoinHashMapType { + &*self.hash_map } /// returns a reference to the build side batch @@ -981,14 +982,25 @@ async fn collect_left_input( // Estimation of memory size, required for hashtable, prior to allocation. // Final result can be verified using `RawTable.allocation_info()` - let fixed_size = size_of::(); - let estimated_hashtable_size = - estimate_memory_size::<(u64, u64)>(num_rows, fixed_size)?; - - reservation.try_grow(estimated_hashtable_size)?; - metrics.build_mem_used.add(estimated_hashtable_size); + let fixed_size_u32 = size_of::(); + let fixed_size_u64 = size_of::(); + + // Use `u32` indices for the JoinHashMap when num_rows ≤ u32::MAX, otherwise use the + // `u64` indice variant + let mut hashmap: Box = if num_rows > u32::MAX as usize { + let estimated_hashtable_size = + estimate_memory_size::<(u64, u64)>(num_rows, fixed_size_u64)?; + reservation.try_grow(estimated_hashtable_size)?; + metrics.build_mem_used.add(estimated_hashtable_size); + Box::new(JoinHashMapU64::with_capacity(num_rows)) + } else { + let estimated_hashtable_size = + estimate_memory_size::<(u32, u64)>(num_rows, fixed_size_u32)?; + reservation.try_grow(estimated_hashtable_size)?; + metrics.build_mem_used.add(estimated_hashtable_size); + Box::new(JoinHashMapU32::with_capacity(num_rows)) + }; - let mut hashmap = JoinHashMap::with_capacity(num_rows); let mut hashes_buffer = Vec::new(); let mut offset = 0; @@ -1000,7 +1012,7 @@ async fn collect_left_input( update_hash( &on_left, batch, - &mut hashmap, + &mut *hashmap, offset, &random_state, &mut hashes_buffer, @@ -1052,19 +1064,16 @@ async fn collect_left_input( /// which allows to keep either first (if set to true) or last (if set to false) row index /// as a chain head for rows with equal hash values. #[allow(clippy::too_many_arguments)] -pub fn update_hash( +pub fn update_hash( on: &[PhysicalExprRef], batch: &RecordBatch, - hash_map: &mut T, + hash_map: &mut dyn JoinHashMapType, offset: usize, random_state: &RandomState, hashes_buffer: &mut Vec, deleted_offset: usize, fifo_hashmap: bool, -) -> Result<()> -where - T: JoinHashMapType, -{ +) -> Result<()> { // evaluate the keys let keys_values = on .iter() @@ -1084,9 +1093,9 @@ where .map(|(i, val)| (i + offset, val)); if fifo_hashmap { - hash_map.update_from_iter(hash_values_iter.rev(), deleted_offset); + hash_map.update_from_iter(Box::new(hash_values_iter.rev()), deleted_offset); } else { - hash_map.update_from_iter(hash_values_iter, deleted_offset); + hash_map.update_from_iter(Box::new(hash_values_iter), deleted_offset); } Ok(()) @@ -1298,7 +1307,7 @@ impl RecordBatchStream for HashJoinStream { /// ``` #[allow(clippy::too_many_arguments)] fn lookup_join_hashmap( - build_hashmap: &JoinHashMap, + build_hashmap: &dyn JoinHashMapType, build_side_values: &[ArrayRef], probe_side_values: &[ArrayRef], null_equality: NullEquality, @@ -1354,11 +1363,9 @@ pub fn equal_rows_arr( ) -> Result<(UInt64Array, UInt32Array)> { let mut iter = left_arrays.iter().zip(right_arrays.iter()); - let (first_left, first_right) = iter.next().ok_or_else(|| { - DataFusionError::Internal( - "At least one array should be provided for both left and right".to_string(), - ) - })?; + let Some((first_left, first_right)) = iter.next() else { + return Ok((Vec::::new().into(), Vec::::new().into())); + }; let arr_left = take(first_left.as_ref(), indices_left, None)?; let arr_right = take(first_right.as_ref(), indices_right, None)?; @@ -1403,10 +1410,12 @@ impl HashJoinStream { handle_state!(ready!(self.fetch_probe_batch(cx))) } HashJoinStreamState::ProcessProbeBatch(_) => { - handle_state!(self.process_probe_batch()) + let poll = handle_state!(self.process_probe_batch()); + self.join_metrics.baseline.record_poll(poll) } HashJoinStreamState::ExhaustedProbeSide => { - handle_state!(self.process_unmatched_build_batch()) + let poll = handle_state!(self.process_unmatched_build_batch()); + self.join_metrics.baseline.record_poll(poll) } HashJoinStreamState::Completed => Poll::Ready(None), }; @@ -1487,6 +1496,23 @@ impl HashJoinStream { let timer = self.join_metrics.join_time.timer(); + // if the left side is empty, we can skip the (potentially expensive) join operation + if build_side.left_data.hash_map.is_empty() && self.filter.is_none() { + let result = build_batch_empty_build_side( + &self.schema, + build_side.left_data.batch(), + &state.batch, + &self.column_indices, + self.join_type, + )?; + self.join_metrics.output_batches.add(1); + timer.done(); + + self.state = HashJoinStreamState::FetchProbeBatch; + + return Ok(StatefulStreamResult::Ready(Some(result))); + } + // get the matched by join keys indices let (left_indices, right_indices, next_offset) = lookup_join_hashmap( build_side.left_data.hash_map(), @@ -1507,6 +1533,7 @@ impl HashJoinStream { right_indices, filter, JoinSide::Left, + None, )? } else { (left_indices, right_indices) @@ -1582,7 +1609,6 @@ impl HashJoinStream { }; self.join_metrics.output_batches.add(1); - self.join_metrics.output_rows.add(result.num_rows()); timer.done(); if next_offset.is_none() { @@ -1639,7 +1665,6 @@ impl HashJoinStream { self.join_metrics.input_rows.add(batch.num_rows()); self.join_metrics.output_batches.add(1); - self.join_metrics.output_rows.add(batch.num_rows()); } timer.done(); @@ -1670,7 +1695,7 @@ impl EmbeddedProjection for HashJoinExec { mod tests { use super::*; use crate::coalesce_partitions::CoalescePartitionsExec; - use crate::test::TestMemoryExec; + use crate::test::{assert_join_metrics, TestMemoryExec}; use crate::{ common, expressions::Column, repartition::RepartitionExec, test::build_table_i32, test::exec::MockExec, @@ -1763,14 +1788,15 @@ mod tests { join_type: &JoinType, null_equality: NullEquality, context: Arc, - ) -> Result<(Vec, Vec)> { + ) -> Result<(Vec, Vec, MetricsSet)> { let join = join(left, right, on, join_type, null_equality)?; let columns_header = columns(&join.schema()); let stream = join.execute(0, context)?; let batches = common::collect(stream).await?; + let metrics = join.metrics().unwrap(); - Ok((columns_header, batches)) + Ok((columns_header, batches, metrics)) } async fn partitioned_join_collect( @@ -1780,7 +1806,7 @@ mod tests { join_type: &JoinType, null_equality: NullEquality, context: Arc, - ) -> Result<(Vec, Vec)> { + ) -> Result<(Vec, Vec, MetricsSet)> { join_collect_with_partition_mode( left, right, @@ -1801,7 +1827,7 @@ mod tests { partition_mode: PartitionMode, null_equality: NullEquality, context: Arc, - ) -> Result<(Vec, Vec)> { + ) -> Result<(Vec, Vec, MetricsSet)> { let partition_count = 4; let (left_expr, right_expr) = on @@ -1865,8 +1891,9 @@ mod tests { .collect::>(), ); } + let metrics = join.metrics().unwrap(); - Ok((columns, batches)) + Ok((columns, batches, metrics)) } #[apply(batch_sizes)] @@ -1889,7 +1916,7 @@ mod tests { Arc::new(Column::new_with_schema("b1", &right.schema())?) as _, )]; - let (columns, batches) = join_collect( + let (columns, batches, metrics) = join_collect( Arc::clone(&left), Arc::clone(&right), on.clone(), @@ -1914,6 +1941,8 @@ mod tests { "#); } + assert_join_metrics!(metrics, 3); + Ok(()) } @@ -1936,7 +1965,7 @@ mod tests { Arc::new(Column::new_with_schema("b1", &right.schema())?) as _, )]; - let (columns, batches) = partitioned_join_collect( + let (columns, batches, metrics) = partitioned_join_collect( Arc::clone(&left), Arc::clone(&right), on.clone(), @@ -1960,6 +1989,8 @@ mod tests { "#); } + assert_join_metrics!(metrics, 3); + Ok(()) } @@ -1981,7 +2012,7 @@ mod tests { Arc::new(Column::new_with_schema("b2", &right.schema())?) as _, )]; - let (columns, batches) = join_collect( + let (columns, batches, metrics) = join_collect( left, right, on, @@ -2006,6 +2037,8 @@ mod tests { "#); } + assert_join_metrics!(metrics, 3); + Ok(()) } @@ -2027,7 +2060,7 @@ mod tests { Arc::new(Column::new_with_schema("b2", &right.schema())?) as _, )]; - let (columns, batches) = join_collect( + let (columns, batches, metrics) = join_collect( left, right, on, @@ -2053,6 +2086,8 @@ mod tests { "#); } + assert_join_metrics!(metrics, 4); + Ok(()) } @@ -2081,7 +2116,7 @@ mod tests { ), ]; - let (columns, batches) = join_collect( + let (columns, batches, metrics) = join_collect( left, right, on, @@ -2122,6 +2157,8 @@ mod tests { "#); } + assert_join_metrics!(metrics, 3); + Ok(()) } @@ -2159,7 +2196,7 @@ mod tests { ), ]; - let (columns, batches) = join_collect( + let (columns, batches, metrics) = join_collect( left, right, on, @@ -2200,6 +2237,8 @@ mod tests { "#); } + assert_join_metrics!(metrics, 3); + Ok(()) } @@ -2232,7 +2271,7 @@ mod tests { Arc::new(Column::new_with_schema("b2", &right.schema())?) as _, )]; - let (columns, batches) = join_collect( + let (columns, batches, metrics) = join_collect( left, right, on, @@ -2258,6 +2297,8 @@ mod tests { "#); } + assert_join_metrics!(metrics, 4); + Ok(()) } @@ -2577,7 +2618,7 @@ mod tests { Arc::new(Column::new_with_schema("b1", &right.schema())?) as _, )]; - let (columns, batches) = join_collect( + let (columns, batches, metrics) = join_collect( Arc::clone(&left), Arc::clone(&right), on.clone(), @@ -2586,6 +2627,7 @@ mod tests { task_ctx, ) .await?; + assert_eq!(columns, vec!["a1", "b1", "c1", "a2", "b1", "c2"]); allow_duplicates! { @@ -2600,6 +2642,8 @@ mod tests { "#); } + assert_join_metrics!(metrics, 3); + Ok(()) } @@ -2622,7 +2666,7 @@ mod tests { Arc::new(Column::new_with_schema("b1", &right.schema())?) as _, )]; - let (columns, batches) = partitioned_join_collect( + let (columns, batches, metrics) = partitioned_join_collect( Arc::clone(&left), Arc::clone(&right), on.clone(), @@ -2631,6 +2675,7 @@ mod tests { task_ctx, ) .await?; + assert_eq!(columns, vec!["a1", "b1", "c1", "a2", "b1", "c2"]); allow_duplicates! { @@ -2645,6 +2690,8 @@ mod tests { "#); } + assert_join_metrics!(metrics, 3); + Ok(()) } @@ -3267,7 +3314,7 @@ mod tests { Arc::new(Column::new_with_schema("b1", &right.schema())?) as _, )]; - let (columns, batches) = join_collect( + let (columns, batches, metrics) = join_collect( left, right, on, @@ -3291,6 +3338,8 @@ mod tests { "#); } + assert_join_metrics!(metrics, 3); + Ok(()) } @@ -3313,7 +3362,7 @@ mod tests { Arc::new(Column::new_with_schema("b1", &right.schema())?) as _, )]; - let (columns, batches) = partitioned_join_collect( + let (columns, batches, metrics) = partitioned_join_collect( left, right, on, @@ -3337,6 +3386,8 @@ mod tests { "#); } + assert_join_metrics!(metrics, 3); + Ok(()) } @@ -3408,7 +3459,7 @@ mod tests { Arc::new(Column::new_with_schema("b1", &right.schema())?) as _, )]; - let (columns, batches) = join_collect( + let (columns, batches, metrics) = join_collect( Arc::clone(&left), Arc::clone(&right), on.clone(), @@ -3417,6 +3468,7 @@ mod tests { task_ctx, ) .await?; + assert_eq!(columns, vec!["a1", "b1", "c1", "mark"]); allow_duplicates! { @@ -3431,6 +3483,8 @@ mod tests { "#); } + assert_join_metrics!(metrics, 3); + Ok(()) } @@ -3453,7 +3507,7 @@ mod tests { Arc::new(Column::new_with_schema("b1", &right.schema())?) as _, )]; - let (columns, batches) = partitioned_join_collect( + let (columns, batches, metrics) = partitioned_join_collect( Arc::clone(&left), Arc::clone(&right), on.clone(), @@ -3462,6 +3516,7 @@ mod tests { task_ctx, ) .await?; + assert_eq!(columns, vec!["a1", "b1", "c1", "mark"]); allow_duplicates! { @@ -3476,6 +3531,8 @@ mod tests { "#); } + assert_join_metrics!(metrics, 3); + Ok(()) } @@ -3498,7 +3555,7 @@ mod tests { Arc::new(Column::new_with_schema("b1", &right.schema())?) as _, )]; - let (columns, batches) = join_collect( + let (columns, batches, metrics) = join_collect( Arc::clone(&left), Arc::clone(&right), on.clone(), @@ -3507,6 +3564,7 @@ mod tests { task_ctx, ) .await?; + assert_eq!(columns, vec!["a2", "b1", "c2", "mark"]); let expected = [ @@ -3520,6 +3578,8 @@ mod tests { ]; assert_batches_sorted_eq!(expected, &batches); + assert_join_metrics!(metrics, 3); + Ok(()) } @@ -3542,7 +3602,7 @@ mod tests { Arc::new(Column::new_with_schema("b1", &right.schema())?) as _, )]; - let (columns, batches) = partitioned_join_collect( + let (columns, batches, metrics) = partitioned_join_collect( Arc::clone(&left), Arc::clone(&right), on.clone(), @@ -3551,6 +3611,7 @@ mod tests { task_ctx, ) .await?; + assert_eq!(columns, vec!["a2", "b1", "c2", "mark"]); let expected = [ @@ -3565,11 +3626,13 @@ mod tests { ]; assert_batches_sorted_eq!(expected, &batches); + assert_join_metrics!(metrics, 4); + Ok(()) } #[test] - fn join_with_hash_collision() -> Result<()> { + fn join_with_hash_collisions_64() -> Result<()> { let mut hashmap_left = HashTable::with_capacity(4); let left = build_table_i32( ("a", &vec![10, 20]), @@ -3606,7 +3669,7 @@ mod tests { // Join key column for both join sides let key_column: PhysicalExprRef = Arc::new(Column::new("a", 0)) as _; - let join_hash_map = JoinHashMap::new(hashmap_left, next); + let join_hash_map = JoinHashMapU64::new(hashmap_left, next); let left_keys_values = key_column.evaluate(&left)?.into_array(left.num_rows())?; let right_keys_values = @@ -3639,6 +3702,70 @@ mod tests { Ok(()) } + #[test] + fn join_with_hash_collisions_u32() -> Result<()> { + let mut hashmap_left = HashTable::with_capacity(4); + let left = build_table_i32( + ("a", &vec![10, 20]), + ("x", &vec![100, 200]), + ("y", &vec![200, 300]), + ); + + let random_state = RandomState::with_seeds(0, 0, 0, 0); + let hashes_buff = &mut vec![0; left.num_rows()]; + let hashes = create_hashes( + &[Arc::clone(&left.columns()[0])], + &random_state, + hashes_buff, + )?; + + hashmap_left.insert_unique(hashes[0], (hashes[0], 1u32), |(h, _)| *h); + hashmap_left.insert_unique(hashes[0], (hashes[0], 2u32), |(h, _)| *h); + hashmap_left.insert_unique(hashes[1], (hashes[1], 1u32), |(h, _)| *h); + hashmap_left.insert_unique(hashes[1], (hashes[1], 2u32), |(h, _)| *h); + + let next: Vec = vec![2, 0]; + + let right = build_table_i32( + ("a", &vec![10, 20]), + ("b", &vec![0, 0]), + ("c", &vec![30, 40]), + ); + + let key_column: PhysicalExprRef = Arc::new(Column::new("a", 0)) as _; + + let join_hash_map = JoinHashMapU32::new(hashmap_left, next); + + let left_keys_values = key_column.evaluate(&left)?.into_array(left.num_rows())?; + let right_keys_values = + key_column.evaluate(&right)?.into_array(right.num_rows())?; + let mut hashes_buffer = vec![0; right.num_rows()]; + create_hashes( + &[Arc::clone(&right_keys_values)], + &random_state, + &mut hashes_buffer, + )?; + + let (l, r, _) = lookup_join_hashmap( + &join_hash_map, + &[left_keys_values], + &[right_keys_values], + NullEquality::NullEqualsNothing, + &hashes_buffer, + 8192, + (0, None), + )?; + + // We still expect to match rows 0 and 1 on both sides + let left_ids: UInt64Array = vec![0, 1].into(); + let right_ids: UInt32Array = vec![0, 1].into(); + + assert_eq!(left_ids, l); + assert_eq!(right_ids, r); + + Ok(()) + } + #[tokio::test] async fn join_with_duplicated_column_names() -> Result<()> { let task_ctx = Arc::new(TaskContext::default()); @@ -4054,7 +4181,7 @@ mod tests { ]; for (join_type, expected) in test_cases { - let (_, batches) = join_collect_with_partition_mode( + let (_, batches, metrics) = join_collect_with_partition_mode( Arc::clone(&left), Arc::clone(&right), on.clone(), @@ -4065,6 +4192,7 @@ mod tests { ) .await?; assert_batches_sorted_eq!(expected, &batches); + assert_join_metrics!(metrics, expected.len() - 4); } Ok(()) @@ -4492,7 +4620,7 @@ mod tests { Arc::new(Column::new_with_schema("n2", &right.schema())?) as _, )]; - let (columns, batches) = join_collect( + let (columns, batches, metrics) = join_collect( left, right, on, @@ -4516,6 +4644,8 @@ mod tests { "#); } + assert_join_metrics!(metrics, 3); + Ok(()) } @@ -4531,7 +4661,7 @@ mod tests { Arc::new(Column::new_with_schema("n2", &right.schema())?) as _, )]; - let (_, batches_null_eq) = join_collect( + let (_, batches_null_eq, metrics) = join_collect( Arc::clone(&left), Arc::clone(&right), on.clone(), @@ -4551,7 +4681,9 @@ mod tests { "#); } - let (_, batches_null_neq) = join_collect( + assert_join_metrics!(metrics, 1); + + let (_, batches_null_neq, metrics) = join_collect( left, right, on, @@ -4561,6 +4693,8 @@ mod tests { ) .await?; + assert_join_metrics!(metrics, 0); + let expected_null_neq = ["+----+----+", "| n1 | n2 |", "+----+----+", "+----+----+"]; assert_batches_eq!(expected_null_neq, &batches_null_neq); diff --git a/datafusion/physical-plan/src/joins/join_hash_map.rs b/datafusion/physical-plan/src/joins/join_hash_map.rs index 521e19d7bf444..bdd4bfeeb0fbe 100644 --- a/datafusion/physical-plan/src/joins/join_hash_map.rs +++ b/datafusion/physical-plan/src/joins/join_hash_map.rs @@ -20,7 +20,7 @@ //! ["on" values] to a list of indices with this key's value. use std::fmt::{self, Debug}; -use std::ops::IndexMut; +use std::ops::Sub; use hashbrown::hash_table::Entry::{Occupied, Vacant}; use hashbrown::HashTable; @@ -35,7 +35,7 @@ use hashbrown::HashTable; /// During this stage it might be the case that a row is contained the same hashmap value, /// but the values don't match. Those are checked in the `equal_rows_arr` method. /// -/// The indices (values) are stored in a separate chained list stored in the `Vec`. +/// The indices (values) are stored in a separate chained list stored as `Vec` or `Vec`. /// /// The first value (+1) is stored in the hashmap, whereas the next value is stored in array at the position value. /// @@ -87,27 +87,170 @@ use hashbrown::HashTable; /// | 0 | 0 | 0 | 2 | 4 | <--- hash value 10 maps to 5,4,2 (which means indices values 4,3,1) /// --------------------- /// ``` -pub struct JoinHashMap { +/// +/// Here we have an option between creating a `JoinHashMapType` using `u32` or `u64` indices +/// based on how many rows were being used for indices. +/// +/// At runtime we choose between using `JoinHashMapU32` and `JoinHashMapU64` which oth implement +/// `JoinHashMapType`. +pub trait JoinHashMapType: Send + Sync { + fn extend_zero(&mut self, len: usize); + + fn update_from_iter<'a>( + &mut self, + iter: Box + Send + 'a>, + deleted_offset: usize, + ); + + fn get_matched_indices<'a>( + &self, + iter: Box + 'a>, + deleted_offset: Option, + ) -> (Vec, Vec); + + fn get_matched_indices_with_limit_offset( + &self, + hash_values: &[u64], + limit: usize, + offset: JoinHashMapOffset, + ) -> (Vec, Vec, Option); + + /// Returns `true` if the join hash map contains no entries. + fn is_empty(&self) -> bool; +} + +pub struct JoinHashMapU32 { + // Stores hash value to last row index + map: HashTable<(u64, u32)>, + // Stores indices in chained list data structure + next: Vec, +} + +impl JoinHashMapU32 { + #[cfg(test)] + pub(crate) fn new(map: HashTable<(u64, u32)>, next: Vec) -> Self { + Self { map, next } + } + + pub fn with_capacity(cap: usize) -> Self { + Self { + map: HashTable::with_capacity(cap), + next: vec![0; cap], + } + } +} + +impl Debug for JoinHashMapU32 { + fn fmt(&self, _f: &mut fmt::Formatter) -> fmt::Result { + Ok(()) + } +} + +impl JoinHashMapType for JoinHashMapU32 { + fn extend_zero(&mut self, _: usize) {} + + fn update_from_iter<'a>( + &mut self, + iter: Box + Send + 'a>, + deleted_offset: usize, + ) { + update_from_iter::(&mut self.map, &mut self.next, iter, deleted_offset); + } + + fn get_matched_indices<'a>( + &self, + iter: Box + 'a>, + deleted_offset: Option, + ) -> (Vec, Vec) { + get_matched_indices::(&self.map, &self.next, iter, deleted_offset) + } + + fn get_matched_indices_with_limit_offset( + &self, + hash_values: &[u64], + limit: usize, + offset: JoinHashMapOffset, + ) -> (Vec, Vec, Option) { + get_matched_indices_with_limit_offset::( + &self.map, + &self.next, + hash_values, + limit, + offset, + ) + } + + fn is_empty(&self) -> bool { + self.map.is_empty() + } +} + +pub struct JoinHashMapU64 { // Stores hash value to last row index map: HashTable<(u64, u64)>, // Stores indices in chained list data structure next: Vec, } -impl JoinHashMap { +impl JoinHashMapU64 { #[cfg(test)] pub(crate) fn new(map: HashTable<(u64, u64)>, next: Vec) -> Self { Self { map, next } } - pub(crate) fn with_capacity(capacity: usize) -> Self { - JoinHashMap { - map: HashTable::with_capacity(capacity), - next: vec![0; capacity], + pub fn with_capacity(cap: usize) -> Self { + Self { + map: HashTable::with_capacity(cap), + next: vec![0; cap], } } } +impl Debug for JoinHashMapU64 { + fn fmt(&self, _f: &mut fmt::Formatter) -> fmt::Result { + Ok(()) + } +} + +impl JoinHashMapType for JoinHashMapU64 { + fn extend_zero(&mut self, _: usize) {} + + fn update_from_iter<'a>( + &mut self, + iter: Box + Send + 'a>, + deleted_offset: usize, + ) { + update_from_iter::(&mut self.map, &mut self.next, iter, deleted_offset); + } + + fn get_matched_indices<'a>( + &self, + iter: Box + 'a>, + deleted_offset: Option, + ) -> (Vec, Vec) { + get_matched_indices::(&self.map, &self.next, iter, deleted_offset) + } + + fn get_matched_indices_with_limit_offset( + &self, + hash_values: &[u64], + limit: usize, + offset: JoinHashMapOffset, + ) -> (Vec, Vec, Option) { + get_matched_indices_with_limit_offset::( + &self.map, + &self.next, + hash_values, + limit, + offset, + ) + } + + fn is_empty(&self) -> bool { + self.map.is_empty() + } +} + // Type of offsets for obtaining indices from JoinHashMap. pub(crate) type JoinHashMapOffset = (usize, Option); @@ -115,250 +258,198 @@ pub(crate) type JoinHashMapOffset = (usize, Option); // Early returns in case of reaching output tuples limit. macro_rules! chain_traverse { ( - $input_indices:ident, $match_indices:ident, $hash_values:ident, $next_chain:ident, - $input_idx:ident, $chain_idx:ident, $remaining_output:ident - ) => { - let mut match_row_idx = $chain_idx - 1; + $input_indices:ident, $match_indices:ident, + $hash_values:ident, $next_chain:ident, + $input_idx:ident, $chain_idx:ident, $remaining_output:ident, $one:ident, $zero:ident + ) => {{ + // now `one` and `zero` are in scope from the outer function + let mut match_row_idx = $chain_idx - $one; loop { - $match_indices.push(match_row_idx); + $match_indices.push(match_row_idx.into()); $input_indices.push($input_idx as u32); $remaining_output -= 1; - // Follow the chain to get the next index value - let next = $next_chain[match_row_idx as usize]; + + let next = $next_chain[match_row_idx.into() as usize]; if $remaining_output == 0 { - // In case current input index is the last, and no more chain values left - // returning None as whole input has been scanned - let next_offset = if $input_idx == $hash_values.len() - 1 && next == 0 { + // we compare against `zero` (of type T) here too + let next_offset = if $input_idx == $hash_values.len() - 1 && next == $zero + { None } else { - Some(($input_idx, Some(next))) + Some(($input_idx, Some(next.into()))) }; return ($input_indices, $match_indices, next_offset); } - if next == 0 { - // end of list + if next == $zero { break; } - match_row_idx = next - 1; + match_row_idx = next - $one; } - }; + }}; } -// Trait defining methods that must be implemented by a hash map type to be used for joins. -pub trait JoinHashMapType { - /// The type of list used to store the next list - type NextType: IndexMut; - /// Extend with zero - fn extend_zero(&mut self, len: usize); - /// Returns mutable references to the hash map and the next. - fn get_mut(&mut self) -> (&mut HashTable<(u64, u64)>, &mut Self::NextType); - /// Returns a reference to the hash map. - fn get_map(&self) -> &HashTable<(u64, u64)>; - /// Returns a reference to the next. - fn get_list(&self) -> &Self::NextType; - - // Whether values in the hashmap are distinct (no duplicate keys) - fn is_distinct(&self) -> bool { - false - } - - /// Updates hashmap from iterator of row indices & row hashes pairs. - fn update_from_iter<'a>( - &mut self, - iter: impl Iterator, - deleted_offset: usize, - ) { - let (mut_map, mut_list) = self.get_mut(); - for (row, &hash_value) in iter { - let entry = mut_map.entry( - hash_value, - |&(hash, _)| hash_value == hash, - |&(hash, _)| hash, - ); +pub fn update_from_iter<'a, T>( + map: &mut HashTable<(u64, T)>, + next: &mut [T], + iter: Box + Send + 'a>, + deleted_offset: usize, +) where + T: Copy + TryFrom + PartialOrd, + >::Error: Debug, +{ + for (row, &hash_value) in iter { + let entry = map.entry( + hash_value, + |&(hash, _)| hash_value == hash, + |&(hash, _)| hash, + ); - match entry { - Occupied(mut occupied_entry) => { - // Already exists: add index to next array - let (_, index) = occupied_entry.get_mut(); - let prev_index = *index; - // Store new value inside hashmap - *index = (row + 1) as u64; - // Update chained Vec at `row` with previous value - mut_list[row - deleted_offset] = prev_index; - } - Vacant(vacant_entry) => { - vacant_entry.insert((hash_value, (row + 1) as u64)); - // chained list at `row` is already initialized with 0 - // meaning end of list - } + match entry { + Occupied(mut occupied_entry) => { + // Already exists: add index to next array + let (_, index) = occupied_entry.get_mut(); + let prev_index = *index; + // Store new value inside hashmap + *index = T::try_from(row + 1).unwrap(); + // Update chained Vec at `row` with previous value + next[row - deleted_offset] = prev_index; + } + Vacant(vacant_entry) => { + vacant_entry.insert((hash_value, T::try_from(row + 1).unwrap())); } } } +} - /// Returns all pairs of row indices matched by hash. - /// - /// This method only compares hashes, so additional further check for actual values - /// equality may be required. - fn get_matched_indices<'a>( - &self, - iter: impl Iterator, - deleted_offset: Option, - ) -> (Vec, Vec) { - let mut input_indices = vec![]; - let mut match_indices = vec![]; - - let hash_map = self.get_map(); - let next_chain = self.get_list(); - for (row_idx, hash_value) in iter { - // Get the hash and find it in the index - if let Some((_, index)) = - hash_map.find(*hash_value, |(hash, _)| *hash_value == *hash) - { - let mut i = *index - 1; - loop { - let match_row_idx = if let Some(offset) = deleted_offset { - // This arguments means that we prune the next index way before here. - if i < offset as u64 { - // End of the list due to pruning - break; - } - i - offset as u64 - } else { - i - }; - match_indices.push(match_row_idx); - input_indices.push(row_idx as u32); - // Follow the chain to get the next index value - let next = next_chain[match_row_idx as usize]; - if next == 0 { - // end of list +pub fn get_matched_indices<'a, T>( + map: &HashTable<(u64, T)>, + next: &[T], + iter: Box + 'a>, + deleted_offset: Option, +) -> (Vec, Vec) +where + T: Copy + TryFrom + PartialOrd + Into + Sub, + >::Error: Debug, +{ + let mut input_indices = vec![]; + let mut match_indices = vec![]; + let zero = T::try_from(0).unwrap(); + let one = T::try_from(1).unwrap(); + + for (row_idx, hash_value) in iter { + // Get the hash and find it in the index + if let Some((_, index)) = map.find(*hash_value, |(hash, _)| *hash_value == *hash) + { + let mut i = *index - one; + loop { + let match_row_idx = if let Some(offset) = deleted_offset { + let offset = T::try_from(offset).unwrap(); + // This arguments means that we prune the next index way before here. + if i < offset { + // End of the list due to pruning break; } - i = next - 1; + i - offset + } else { + i + }; + match_indices.push(match_row_idx.into()); + input_indices.push(row_idx as u32); + // Follow the chain to get the next index value + let next_chain = next[match_row_idx.into() as usize]; + if next_chain == zero { + // end of list + break; } + i = next_chain - one; } } - - (input_indices, match_indices) } - /// Matches hashes with taking limit and offset into account. - /// Returns pairs of matched indices along with the starting point for next - /// matching iteration (`None` if limit has not been reached). - /// - /// This method only compares hashes, so additional further check for actual values - /// equality may be required. - fn get_matched_indices_with_limit_offset( - &self, - hash_values: &[u64], - limit: usize, - offset: JoinHashMapOffset, - ) -> (Vec, Vec, Option) { - let mut input_indices = Vec::with_capacity(limit); - let mut match_indices = Vec::with_capacity(limit); - - let hash_map: &HashTable<(u64, u64)> = self.get_map(); - let next_chain = self.get_list(); - // Check if hashmap consists of unique values - // If so, we can skip the chain traversal - if self.is_distinct() { - let start = offset.0; - let end = (start + limit).min(hash_values.len()); - for (row_idx, &hash_value) in hash_values[start..end].iter().enumerate() { - if let Some((_, index)) = - hash_map.find(hash_value, |(hash, _)| hash_value == *hash) - { - input_indices.push(start as u32 + row_idx as u32); - match_indices.push(*index - 1); - } - } - if end == hash_values.len() { - // No more values to process - return (input_indices, match_indices, None); - } - return (input_indices, match_indices, Some((end, None))); - } + (input_indices, match_indices) +} - let mut remaining_output = limit; - - // Calculate initial `hash_values` index before iterating - let to_skip = match offset { - // None `initial_next_idx` indicates that `initial_idx` processing has'n been started - (initial_idx, None) => initial_idx, - // Zero `initial_next_idx` indicates that `initial_idx` has been processed during - // previous iteration, and it should be skipped - (initial_idx, Some(0)) => initial_idx + 1, - // Otherwise, process remaining `initial_idx` matches by traversing `next_chain`, - // to start with the next index - (initial_idx, Some(initial_next_idx)) => { - chain_traverse!( - input_indices, - match_indices, - hash_values, - next_chain, - initial_idx, - initial_next_idx, - remaining_output - ); - - initial_idx + 1 - } - }; +pub fn get_matched_indices_with_limit_offset( + map: &HashTable<(u64, T)>, + next_chain: &[T], + hash_values: &[u64], + limit: usize, + offset: JoinHashMapOffset, +) -> (Vec, Vec, Option) +where + T: Copy + TryFrom + PartialOrd + Into + Sub, + >::Error: Debug, +{ + let mut input_indices = Vec::with_capacity(limit); + let mut match_indices = Vec::with_capacity(limit); + let zero = T::try_from(0).unwrap(); + let one = T::try_from(1).unwrap(); - let mut row_idx = to_skip; - - for hash_value in &hash_values[to_skip..] { - if let Some((_, index)) = - hash_map.find(*hash_value, |(hash, _)| *hash_value == *hash) - { - chain_traverse!( - input_indices, - match_indices, - hash_values, - next_chain, - row_idx, - index, - remaining_output - ); + // Check if hashmap consists of unique values + // If so, we can skip the chain traversal + if map.len() == next_chain.len() { + let start = offset.0; + let end = (start + limit).min(hash_values.len()); + for (i, &hash) in hash_values[start..end].iter().enumerate() { + if let Some((_, idx)) = map.find(hash, |(h, _)| hash == *h) { + input_indices.push(start as u32 + i as u32); + match_indices.push((*idx - one).into()); } - row_idx += 1; } - - (input_indices, match_indices, None) - } -} - -/// Implementation of `JoinHashMapType` for `JoinHashMap`. -impl JoinHashMapType for JoinHashMap { - type NextType = Vec; - - // Void implementation - fn extend_zero(&mut self, _: usize) {} - - /// Get mutable references to the hash map and the next. - fn get_mut(&mut self) -> (&mut HashTable<(u64, u64)>, &mut Self::NextType) { - (&mut self.map, &mut self.next) - } - - /// Get a reference to the hash map. - fn get_map(&self) -> &HashTable<(u64, u64)> { - &self.map + let next_off = if end == hash_values.len() { + None + } else { + Some((end, None)) + }; + return (input_indices, match_indices, next_off); } - /// Get a reference to the next. - fn get_list(&self) -> &Self::NextType { - &self.next - } + let mut remaining_output = limit; - /// Check if the values in the hashmap are distinct. - fn is_distinct(&self) -> bool { - self.map.len() == self.next.len() - } -} + // Calculate initial `hash_values` index before iterating + let to_skip = match offset { + // None `initial_next_idx` indicates that `initial_idx` processing has'n been started + (idx, None) => idx, + // Zero `initial_next_idx` indicates that `initial_idx` has been processed during + // previous iteration, and it should be skipped + (idx, Some(0)) => idx + 1, + // Otherwise, process remaining `initial_idx` matches by traversing `next_chain`, + // to start with the next index + (idx, Some(next_idx)) => { + let next_idx: T = T::try_from(next_idx as usize).unwrap(); + chain_traverse!( + input_indices, + match_indices, + hash_values, + next_chain, + idx, + next_idx, + remaining_output, + one, + zero + ); + idx + 1 + } + }; -impl Debug for JoinHashMap { - fn fmt(&self, _f: &mut fmt::Formatter) -> fmt::Result { - Ok(()) + let mut row_idx = to_skip; + for &hash in &hash_values[to_skip..] { + if let Some((_, idx)) = map.find(hash, |(h, _)| hash == *h) { + let idx: T = *idx; + chain_traverse!( + input_indices, + match_indices, + hash_values, + next_chain, + row_idx, + idx, + remaining_output, + one, + zero + ); + } + row_idx += 1; } + (input_indices, match_indices, None) } diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs index fcc1107a0e26c..5bb1673d4af26 100644 --- a/datafusion/physical-plan/src/joins/nested_loop_join.rs +++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs @@ -18,6 +18,7 @@ //! [`NestedLoopJoinExec`]: joins without equijoin (equality predicates). use std::any::Any; +use std::cmp::min; use std::fmt::Formatter; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; @@ -26,7 +27,7 @@ use std::task::Poll; use super::utils::{ asymmetric_join_output_partitioning, get_final_indices_from_shared_bitmap, need_produce_result_in_final, reorder_output_after_swap, swap_join_projection, - BatchSplitter, BatchTransformer, NoopBatchTransformer, StatefulStreamResult, + StatefulStreamResult, }; use crate::common::can_project; use crate::execution_plan::{boundedness_from_children, EmissionType}; @@ -47,12 +48,13 @@ use crate::{ SendableRecordBatchStream, }; -use arrow::array::{BooleanBufferBuilder, UInt32Array, UInt64Array}; +use arrow::array::{BooleanBufferBuilder, PrimitiveArray, UInt32Array, UInt64Array}; use arrow::compute::concat_batches; -use arrow::datatypes::{Schema, SchemaRef}; +use arrow::datatypes::{Schema, SchemaRef, UInt32Type, UInt64Type}; use arrow::record_batch::RecordBatch; use datafusion_common::{ - exec_datafusion_err, internal_err, project_schema, JoinSide, Result, Statistics, + exec_datafusion_err, internal_datafusion_err, internal_err, project_schema, JoinSide, + Result, Statistics, }; use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation}; use datafusion_execution::TaskContext; @@ -510,8 +512,6 @@ impl ExecutionPlan for NestedLoopJoinExec { })?; let batch_size = context.session_config().batch_size(); - let enforce_batch_size_in_joins = - context.session_config().enforce_batch_size_in_joins(); let outer_table = self.right.execute(partition, context)?; @@ -530,37 +530,21 @@ impl ExecutionPlan for NestedLoopJoinExec { None => self.column_indices.clone(), }; - if enforce_batch_size_in_joins { - Ok(Box::pin(NestedLoopJoinStream { - schema: self.schema(), - filter: self.filter.clone(), - join_type: self.join_type, - outer_table, - inner_table, - column_indices: column_indices_after_projection, - join_metrics, - indices_cache, - right_side_ordered, - state: NestedLoopJoinStreamState::WaitBuildSide, - batch_transformer: BatchSplitter::new(batch_size), - left_data: None, - })) - } else { - Ok(Box::pin(NestedLoopJoinStream { - schema: self.schema(), - filter: self.filter.clone(), - join_type: self.join_type, - outer_table, - inner_table, - column_indices: column_indices_after_projection, - join_metrics, - indices_cache, - right_side_ordered, - state: NestedLoopJoinStreamState::WaitBuildSide, - batch_transformer: NoopBatchTransformer::new(), - left_data: None, - })) - } + Ok(Box::pin(NestedLoopJoinStream { + schema: self.schema(), + filter: self.filter.clone(), + join_type: self.join_type, + outer_table, + inner_table, + column_indices: column_indices_after_projection, + join_metrics, + indices_cache, + right_side_ordered, + state: NestedLoopJoinStreamState::WaitBuildSide, + left_data: None, + join_result_status: None, + intermediate_batch_size: batch_size, + })) } fn metrics(&self) -> Option { @@ -687,8 +671,15 @@ enum NestedLoopJoinStreamState { /// Indicates that a non-empty batch has been fetched from probe-side, and /// is ready to be processed ProcessProbeBatch(RecordBatch), - /// Indicates that probe-side has been fully processed - ExhaustedProbeSide, + /// Preparation phase: Gathers the indices of unmatched rows from the build-side. + /// This state is entered for join types that emit unmatched build-side rows + /// (e.g., LEFT and FULL joins) after the entire probe-side input has been consumed. + PrepareUnmatchedBuildRows, + /// Output unmatched build-side rows. + /// The indices for rows to output has already been calculated in the previous + /// `PrepareUnmatchedBuildRows` state. In this state the final batch will be materialized incrementally. + // The inner `RecordBatch` is an empty dummy batch used to get right schema. + OutputUnmatchedBuildRows(RecordBatch), /// Indicates that NestedLoopJoinStream execution is completed Completed, } @@ -705,8 +696,29 @@ impl NestedLoopJoinStreamState { } } +/// Tracks incremental output of join result batches. +/// +/// Initialized with all matching pairs that satisfy the join predicate. +/// Pairs are stored as indices in `build_indices` and `probe_indices` +/// Each poll outputs a batch within the configured size limit and updates +/// processed_count until all pairs are consumed. +/// +/// Example: 5000 matches, batch size limit is 100 +/// - Poll 1: output batch[0..100], processed_count = 100 +/// - Poll 2: output batch[100..200], processed_count = 200 +/// - ...continues until processed_count = 5000 +struct JoinResultProgress { + /// Row indices from build-side table (left table). + build_indices: PrimitiveArray, + /// Row indices from probe-side table (right table). + probe_indices: PrimitiveArray, + /// Number of index pairs already processed into output batches. + /// We have completed join result for indices [0..processed_count). + processed_count: usize, +} + /// A stream that issues [RecordBatch]es as they arrive from the right of the join. -struct NestedLoopJoinStream { +struct NestedLoopJoinStream { /// Input schema schema: Arc, /// join filter @@ -729,10 +741,13 @@ struct NestedLoopJoinStream { right_side_ordered: bool, /// Current state of the stream state: NestedLoopJoinStreamState, - /// Transforms the output batch before returning. - batch_transformer: T, /// Result of the left data future left_data: Option>, + + /// Tracks progress when building join result batches incrementally. + join_result_status: Option, + + intermediate_batch_size: usize, } /// Creates a Cartesian product of two input batches, preserving the order of the right batch, @@ -755,6 +770,7 @@ fn build_join_indices( right_batch: &RecordBatch, filter: Option<&JoinFilter>, indices_cache: &mut (UInt64Array, UInt32Array), + max_intermediate_batch_size: usize, ) -> Result<(UInt64Array, UInt32Array)> { let left_row_count = left_batch.num_rows(); let right_row_count = right_batch.num_rows(); @@ -805,13 +821,14 @@ fn build_join_indices( right_indices, filter, JoinSide::Left, + Some(max_intermediate_batch_size), ) } else { Ok((left_indices, right_indices)) } } -impl NestedLoopJoinStream { +impl NestedLoopJoinStream { fn poll_next_impl( &mut self, cx: &mut std::task::Context<'_>, @@ -825,16 +842,131 @@ impl NestedLoopJoinStream { handle_state!(ready!(self.fetch_probe_batch(cx))) } NestedLoopJoinStreamState::ProcessProbeBatch(_) => { - handle_state!(self.process_probe_batch()) + let poll = handle_state!(self.process_probe_batch()); + self.join_metrics.baseline.record_poll(poll) } - NestedLoopJoinStreamState::ExhaustedProbeSide => { - handle_state!(self.process_unmatched_build_batch()) + NestedLoopJoinStreamState::PrepareUnmatchedBuildRows => { + handle_state!(self.prepare_unmatched_output_indices()) + } + NestedLoopJoinStreamState::OutputUnmatchedBuildRows(_) => { + let poll = handle_state!(self.build_unmatched_output()); + self.join_metrics.baseline.record_poll(poll) } NestedLoopJoinStreamState::Completed => Poll::Ready(None), }; } } + // This function's main job is to construct an output `RecordBatch` based on pre-calculated join indices. + // It operates in a chunk-based manner, meaning it processes a portion of the results in each call, + // making it suitable for streaming large datasets without high memory consumption. + // This function behaves like an iterator. It returns `Ok(None)` + // to signal that the result stream is exhausted and there is no more data. + fn get_next_join_result(&mut self) -> Result> { + let status = self.join_result_status.as_mut().ok_or_else(|| { + internal_datafusion_err!( + "get_next_join_result called without initializing join_result_status" + ) + })?; + + let (left_indices, right_indices, current_start) = ( + &status.build_indices, + &status.probe_indices, + status.processed_count, + ); + + let left_batch = self + .left_data + .as_ref() + .ok_or_else(|| internal_datafusion_err!("should have left_batch"))? + .batch(); + + let right_batch = match &self.state { + NestedLoopJoinStreamState::ProcessProbeBatch(record_batch) + | NestedLoopJoinStreamState::OutputUnmatchedBuildRows(record_batch) => { + record_batch + } + _ => { + return internal_err!( + "State should be ProcessProbeBatch or OutputUnmatchedBuildRows" + ) + } + }; + + if left_indices.is_empty() && right_indices.is_empty() && current_start == 0 { + // To match the behavior of the previous implementation, return an empty RecordBatch. + let res = RecordBatch::new_empty(Arc::clone(&self.schema)); + status.processed_count = 1; + return Ok(Some(res)); + } + + if matches!(self.join_type, JoinType::RightSemi | JoinType::RightAnti) { + // in this case left_indices.num_rows() == 0 + let end = min( + current_start + self.intermediate_batch_size, + right_indices.len(), + ); + + if current_start >= end { + return Ok(None); + } + + let res = Some(build_batch_from_indices( + &self.schema, + left_batch, + right_batch, + left_indices, + &right_indices.slice(current_start, end - current_start), + &self.column_indices, + JoinSide::Left, + )?); + + status.processed_count = end; + return Ok(res); + } + + if current_start >= left_indices.len() { + return Ok(None); + } + + let end = min( + current_start + self.intermediate_batch_size, + left_indices.len(), + ); + + let left_indices = &left_indices.slice(current_start, end - current_start); + let right_indices = &right_indices.slice(current_start, end - current_start); + + // Switch around the build side and probe side for `JoinType::RightMark` + // because in a RightMark join, we want to mark rows on the right table + // by looking for matches in the left. + let res = if self.join_type == JoinType::RightMark { + build_batch_from_indices( + &self.schema, + right_batch, + left_batch, + left_indices, + right_indices, + &self.column_indices, + JoinSide::Right, + ) + } else { + build_batch_from_indices( + &self.schema, + left_batch, + right_batch, + left_indices, + right_indices, + &self.column_indices, + JoinSide::Left, + ) + }?; + + status.processed_count = end; + + Ok(Some(res)) + } + fn collect_build_side( &mut self, cx: &mut std::task::Context<'_>, @@ -859,9 +991,12 @@ impl NestedLoopJoinStream { ) -> Poll>>> { match ready!(self.outer_table.poll_next_unpin(cx)) { None => { - self.state = NestedLoopJoinStreamState::ExhaustedProbeSide; + self.state = NestedLoopJoinStreamState::PrepareUnmatchedBuildRows; } Some(Ok(right_batch)) => { + self.join_metrics.input_batches.add(1); + self.join_metrics.input_rows.add(right_batch.num_rows()); + self.state = NestedLoopJoinStreamState::ProcessProbeBatch(right_batch); } Some(Err(err)) => return Poll::Ready(Err(err)), @@ -883,44 +1018,64 @@ impl NestedLoopJoinStream { let visited_left_side = left_data.bitmap(); let batch = self.state.try_as_process_probe_batch()?; - match self.batch_transformer.next() { - None => { - // Setting up timer & updating input metrics - self.join_metrics.input_batches.add(1); - self.join_metrics.input_rows.add(batch.num_rows()); - let timer = self.join_metrics.join_time.timer(); - - let result = join_left_and_right_batch( - left_data.batch(), - batch, - self.join_type, - self.filter.as_ref(), - &self.column_indices, - &self.schema, - visited_left_side, - &mut self.indices_cache, - self.right_side_ordered, - ); - timer.done(); + let binding = self.join_metrics.join_time.clone(); + let _timer = binding.timer(); - self.batch_transformer.set_batch(result?); + if self.join_result_status.is_none() { + let (left_side_indices, right_side_indices) = join_left_and_right_batch( + left_data.batch(), + batch, + self.join_type, + self.filter.as_ref(), + visited_left_side, + &mut self.indices_cache, + self.right_side_ordered, + self.intermediate_batch_size, + )?; + self.join_result_status = Some(JoinResultProgress { + build_indices: left_side_indices, + probe_indices: right_side_indices, + processed_count: 0, + }) + } + + let join_result = self.get_next_join_result()?; + + match join_result { + Some(res) => { + self.join_metrics.output_batches.add(1); + Ok(StatefulStreamResult::Ready(Some(res))) + } + None => { + self.state = NestedLoopJoinStreamState::FetchProbeBatch; + self.join_result_status = None; Ok(StatefulStreamResult::Continue) } - Some((batch, last)) => { - if last { - self.state = NestedLoopJoinStreamState::FetchProbeBatch; - } + } + } + + fn build_unmatched_output( + &mut self, + ) -> Result>> { + let binding = self.join_metrics.join_time.clone(); + let _timer = binding.timer(); + let res = self.get_next_join_result()?; + match res { + Some(res) => { self.join_metrics.output_batches.add(1); - self.join_metrics.output_rows.add(batch.num_rows()); - Ok(StatefulStreamResult::Ready(Some(batch))) + Ok(StatefulStreamResult::Ready(Some(res))) + } + None => { + self.state = NestedLoopJoinStreamState::Completed; + Ok(StatefulStreamResult::Ready(None)) } } } - /// Processes unmatched build-side rows for certain join types and produces - /// output batch, updates state to `Completed`. - fn process_unmatched_build_batch( + /// This function's primary purpose is to handle the final output stage required by specific join types after all right-side (probe) data has been exhausted. + /// It is critically important for LEFT*/FULL joins, which must emit left-side (build) rows that found no match. For these cases, it identifies the unmatched rows and prepares the necessary state to output them. + fn prepare_unmatched_output_indices( &mut self, ) -> Result>> { let Some(left_data) = self.left_data.clone() else { @@ -941,29 +1096,21 @@ impl NestedLoopJoinStream { }; // Only setting up timer, input is exhausted - let timer = self.join_metrics.join_time.timer(); + let _timer = self.join_metrics.join_time.timer(); // use the global left bitmap to produce the left indices and right indices let (left_side, right_side) = get_final_indices_from_shared_bitmap(visited_left_side, self.join_type); - let empty_right_batch = RecordBatch::new_empty(self.outer_table.schema()); - // use the left and right indices to produce the batch result - let result = build_batch_from_indices( - &self.schema, - left_data.batch(), - &empty_right_batch, - &left_side, - &right_side, - &self.column_indices, - JoinSide::Left, - ); - self.state = NestedLoopJoinStreamState::Completed; - // Recording time - if result.is_ok() { - timer.done(); - } + self.join_result_status = Some(JoinResultProgress { + build_indices: left_side, + probe_indices: right_side, + processed_count: 0, + }); + self.state = NestedLoopJoinStreamState::OutputUnmatchedBuildRows( + RecordBatch::new_empty(self.outer_table.schema()), + ); - Ok(StatefulStreamResult::Ready(Some(result?))) + Ok(StatefulStreamResult::Continue) } else { // end of the join loop self.state = NestedLoopJoinStreamState::Completed; @@ -978,20 +1125,23 @@ fn join_left_and_right_batch( right_batch: &RecordBatch, join_type: JoinType, filter: Option<&JoinFilter>, - column_indices: &[ColumnIndex], - schema: &Schema, visited_left_side: &SharedBitmapBuilder, indices_cache: &mut (UInt64Array, UInt32Array), right_side_ordered: bool, -) -> Result { - let (left_side, right_side) = - build_join_indices(left_batch, right_batch, filter, indices_cache).map_err( - |e| { - exec_datafusion_err!( - "Fail to build join indices in NestedLoopJoinExec, error: {e}" - ) - }, - )?; + max_intermediate_batch_size: usize, +) -> Result<(PrimitiveArray, PrimitiveArray)> { + let (left_side, right_side) = build_join_indices( + left_batch, + right_batch, + filter, + indices_cache, + max_intermediate_batch_size, + ) + .map_err(|e| { + exec_datafusion_err!( + "Fail to build join indices in NestedLoopJoinExec, error: {e}" + ) + })?; // set the left bitmap // and only full join need the left bitmap @@ -1010,33 +1160,10 @@ fn join_left_and_right_batch( right_side_ordered, )?; - // Switch around the build side and probe side for `JoinType::RightMark` - // because in a RightMark join, we want to mark rows on the right table - // by looking for matches in the left. - if join_type == JoinType::RightMark { - build_batch_from_indices( - schema, - right_batch, - left_batch, - &left_side, - &right_side, - column_indices, - JoinSide::Right, - ) - } else { - build_batch_from_indices( - schema, - left_batch, - right_batch, - &left_side, - &right_side, - column_indices, - JoinSide::Left, - ) - } + Ok((left_side, right_side)) } -impl Stream for NestedLoopJoinStream { +impl Stream for NestedLoopJoinStream { type Item = Result; fn poll_next( @@ -1047,7 +1174,7 @@ impl Stream for NestedLoopJoinStream { } } -impl RecordBatchStream for NestedLoopJoinStream { +impl RecordBatchStream for NestedLoopJoinStream { fn schema(&self) -> SchemaRef { Arc::clone(&self.schema) } @@ -1062,7 +1189,7 @@ impl EmbeddedProjection for NestedLoopJoinExec { #[cfg(test)] pub(crate) mod tests { use super::*; - use crate::test::TestMemoryExec; + use crate::test::{assert_join_metrics, TestMemoryExec}; use crate::{ common, expressions::Column, repartition::RepartitionExec, test::build_table_i32, }; @@ -1078,6 +1205,7 @@ pub(crate) mod tests { use datafusion_physical_expr::{Partitioning, PhysicalExpr}; use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr}; + use insta::allow_duplicates; use insta::assert_snapshot; use rstest::rstest; @@ -1195,7 +1323,7 @@ pub(crate) mod tests { join_type: &JoinType, join_filter: Option, context: Arc, - ) -> Result<(Vec, Vec)> { + ) -> Result<(Vec, Vec, MetricsSet)> { let partition_count = 4; // Redistributing right input @@ -1215,20 +1343,35 @@ pub(crate) mod tests { batches.extend( more_batches .into_iter() + .inspect(|b| { + assert!(b.num_rows() <= context.session_config().batch_size()) + }) .filter(|b| b.num_rows() > 0) .collect::>(), ); } - Ok((columns, batches)) + + let metrics = nested_loop_join.metrics().unwrap(); + + Ok((columns, batches, metrics)) + } + + fn new_task_ctx(batch_size: usize) -> Arc { + let base = TaskContext::default(); + // limit max size of intermediate batch used in nlj to 1 + let cfg = base.session_config().clone().with_batch_size(batch_size); + Arc::new(base.with_session_config(cfg)) } + #[rstest] #[tokio::test] - async fn join_inner_with_filter() -> Result<()> { - let task_ctx = Arc::new(TaskContext::default()); + async fn join_inner_with_filter(#[values(1, 2, 16)] batch_size: usize) -> Result<()> { + let task_ctx = new_task_ctx(batch_size); + dbg!(&batch_size); let left = build_left_table(); let right = build_right_table(); let filter = prepare_join_filter(); - let (columns, batches) = multi_partitioned_join_collect( + let (columns, batches, metrics) = multi_partitioned_join_collect( left, right, &JoinType::Inner, @@ -1236,26 +1379,30 @@ pub(crate) mod tests { task_ctx, ) .await?; + assert_eq!(columns, vec!["a1", "b1", "c1", "a2", "b2", "c2"]); - assert_snapshot!(batches_to_sort_string(&batches), @r#" + allow_duplicates!(assert_snapshot!(batches_to_sort_string(&batches), @r#" +----+----+----+----+----+----+ | a1 | b1 | c1 | a2 | b2 | c2 | +----+----+----+----+----+----+ | 5 | 5 | 50 | 2 | 2 | 80 | +----+----+----+----+----+----+ - "#); + "#)); + + assert_join_metrics!(metrics, 1); Ok(()) } + #[rstest] #[tokio::test] - async fn join_left_with_filter() -> Result<()> { - let task_ctx = Arc::new(TaskContext::default()); + async fn join_left_with_filter(#[values(1, 2, 16)] batch_size: usize) -> Result<()> { + let task_ctx = new_task_ctx(batch_size); let left = build_left_table(); let right = build_right_table(); let filter = prepare_join_filter(); - let (columns, batches) = multi_partitioned_join_collect( + let (columns, batches, metrics) = multi_partitioned_join_collect( left, right, &JoinType::Left, @@ -1264,7 +1411,7 @@ pub(crate) mod tests { ) .await?; assert_eq!(columns, vec!["a1", "b1", "c1", "a2", "b2", "c2"]); - assert_snapshot!(batches_to_sort_string(&batches), @r#" + allow_duplicates!(assert_snapshot!(batches_to_sort_string(&batches), @r#" +----+----+-----+----+----+----+ | a1 | b1 | c1 | a2 | b2 | c2 | +----+----+-----+----+----+----+ @@ -1272,19 +1419,22 @@ pub(crate) mod tests { | 5 | 5 | 50 | 2 | 2 | 80 | | 9 | 8 | 90 | | | | +----+----+-----+----+----+----+ - "#); + "#)); + + assert_join_metrics!(metrics, 3); Ok(()) } + #[rstest] #[tokio::test] - async fn join_right_with_filter() -> Result<()> { - let task_ctx = Arc::new(TaskContext::default()); + async fn join_right_with_filter(#[values(1, 2, 16)] batch_size: usize) -> Result<()> { + let task_ctx = new_task_ctx(batch_size); let left = build_left_table(); let right = build_right_table(); let filter = prepare_join_filter(); - let (columns, batches) = multi_partitioned_join_collect( + let (columns, batches, metrics) = multi_partitioned_join_collect( left, right, &JoinType::Right, @@ -1293,7 +1443,7 @@ pub(crate) mod tests { ) .await?; assert_eq!(columns, vec!["a1", "b1", "c1", "a2", "b2", "c2"]); - assert_snapshot!(batches_to_sort_string(&batches), @r#" + allow_duplicates!(assert_snapshot!(batches_to_sort_string(&batches), @r#" +----+----+----+----+----+-----+ | a1 | b1 | c1 | a2 | b2 | c2 | +----+----+----+----+----+-----+ @@ -1301,19 +1451,22 @@ pub(crate) mod tests { | | | | 12 | 10 | 40 | | 5 | 5 | 50 | 2 | 2 | 80 | +----+----+----+----+----+-----+ - "#); + "#)); + + assert_join_metrics!(metrics, 3); Ok(()) } + #[rstest] #[tokio::test] - async fn join_full_with_filter() -> Result<()> { - let task_ctx = Arc::new(TaskContext::default()); + async fn join_full_with_filter(#[values(1, 2, 16)] batch_size: usize) -> Result<()> { + let task_ctx = new_task_ctx(batch_size); let left = build_left_table(); let right = build_right_table(); let filter = prepare_join_filter(); - let (columns, batches) = multi_partitioned_join_collect( + let (columns, batches, metrics) = multi_partitioned_join_collect( left, right, &JoinType::Full, @@ -1322,7 +1475,7 @@ pub(crate) mod tests { ) .await?; assert_eq!(columns, vec!["a1", "b1", "c1", "a2", "b2", "c2"]); - assert_snapshot!(batches_to_sort_string(&batches), @r#" + allow_duplicates!(assert_snapshot!(batches_to_sort_string(&batches), @r#" +----+----+-----+----+----+-----+ | a1 | b1 | c1 | a2 | b2 | c2 | +----+----+-----+----+----+-----+ @@ -1332,19 +1485,24 @@ pub(crate) mod tests { | 5 | 5 | 50 | 2 | 2 | 80 | | 9 | 8 | 90 | | | | +----+----+-----+----+----+-----+ - "#); + "#)); + + assert_join_metrics!(metrics, 5); Ok(()) } + #[rstest] #[tokio::test] - async fn join_left_semi_with_filter() -> Result<()> { - let task_ctx = Arc::new(TaskContext::default()); + async fn join_left_semi_with_filter( + #[values(1, 2, 16)] batch_size: usize, + ) -> Result<()> { + let task_ctx = new_task_ctx(batch_size); let left = build_left_table(); let right = build_right_table(); let filter = prepare_join_filter(); - let (columns, batches) = multi_partitioned_join_collect( + let (columns, batches, metrics) = multi_partitioned_join_collect( left, right, &JoinType::LeftSemi, @@ -1353,25 +1511,30 @@ pub(crate) mod tests { ) .await?; assert_eq!(columns, vec!["a1", "b1", "c1"]); - assert_snapshot!(batches_to_sort_string(&batches), @r#" + allow_duplicates!(assert_snapshot!(batches_to_sort_string(&batches), @r#" +----+----+----+ | a1 | b1 | c1 | +----+----+----+ | 5 | 5 | 50 | +----+----+----+ - "#); + "#)); + + assert_join_metrics!(metrics, 1); Ok(()) } + #[rstest] #[tokio::test] - async fn join_left_anti_with_filter() -> Result<()> { - let task_ctx = Arc::new(TaskContext::default()); + async fn join_left_anti_with_filter( + #[values(1, 2, 16)] batch_size: usize, + ) -> Result<()> { + let task_ctx = new_task_ctx(batch_size); let left = build_left_table(); let right = build_right_table(); let filter = prepare_join_filter(); - let (columns, batches) = multi_partitioned_join_collect( + let (columns, batches, metrics) = multi_partitioned_join_collect( left, right, &JoinType::LeftAnti, @@ -1380,26 +1543,31 @@ pub(crate) mod tests { ) .await?; assert_eq!(columns, vec!["a1", "b1", "c1"]); - assert_snapshot!(batches_to_sort_string(&batches), @r#" + allow_duplicates!(assert_snapshot!(batches_to_sort_string(&batches), @r#" +----+----+-----+ | a1 | b1 | c1 | +----+----+-----+ | 11 | 8 | 110 | | 9 | 8 | 90 | +----+----+-----+ - "#); + "#)); + + assert_join_metrics!(metrics, 2); Ok(()) } + #[rstest] #[tokio::test] - async fn join_right_semi_with_filter() -> Result<()> { - let task_ctx = Arc::new(TaskContext::default()); + async fn join_right_semi_with_filter( + #[values(1, 2, 16)] batch_size: usize, + ) -> Result<()> { + let task_ctx = new_task_ctx(batch_size); let left = build_left_table(); let right = build_right_table(); let filter = prepare_join_filter(); - let (columns, batches) = multi_partitioned_join_collect( + let (columns, batches, metrics) = multi_partitioned_join_collect( left, right, &JoinType::RightSemi, @@ -1408,25 +1576,30 @@ pub(crate) mod tests { ) .await?; assert_eq!(columns, vec!["a2", "b2", "c2"]); - assert_snapshot!(batches_to_sort_string(&batches), @r#" + allow_duplicates!(assert_snapshot!(batches_to_sort_string(&batches), @r#" +----+----+----+ | a2 | b2 | c2 | +----+----+----+ | 2 | 2 | 80 | +----+----+----+ - "#); + "#)); + + assert_join_metrics!(metrics, 1); Ok(()) } + #[rstest] #[tokio::test] - async fn join_right_anti_with_filter() -> Result<()> { - let task_ctx = Arc::new(TaskContext::default()); + async fn join_right_anti_with_filter( + #[values(1, 2, 16)] batch_size: usize, + ) -> Result<()> { + let task_ctx = new_task_ctx(batch_size); let left = build_left_table(); let right = build_right_table(); let filter = prepare_join_filter(); - let (columns, batches) = multi_partitioned_join_collect( + let (columns, batches, metrics) = multi_partitioned_join_collect( left, right, &JoinType::RightAnti, @@ -1435,26 +1608,31 @@ pub(crate) mod tests { ) .await?; assert_eq!(columns, vec!["a2", "b2", "c2"]); - assert_snapshot!(batches_to_sort_string(&batches), @r#" + allow_duplicates!(assert_snapshot!(batches_to_sort_string(&batches), @r#" +----+----+-----+ | a2 | b2 | c2 | +----+----+-----+ | 10 | 10 | 100 | | 12 | 10 | 40 | +----+----+-----+ - "#); + "#)); + + assert_join_metrics!(metrics, 2); Ok(()) } + #[rstest] #[tokio::test] - async fn join_left_mark_with_filter() -> Result<()> { - let task_ctx = Arc::new(TaskContext::default()); + async fn join_left_mark_with_filter( + #[values(1, 2, 16)] batch_size: usize, + ) -> Result<()> { + let task_ctx = new_task_ctx(batch_size); let left = build_left_table(); let right = build_right_table(); let filter = prepare_join_filter(); - let (columns, batches) = multi_partitioned_join_collect( + let (columns, batches, metrics) = multi_partitioned_join_collect( left, right, &JoinType::LeftMark, @@ -1463,7 +1641,7 @@ pub(crate) mod tests { ) .await?; assert_eq!(columns, vec!["a1", "b1", "c1", "mark"]); - assert_snapshot!(batches_to_sort_string(&batches), @r#" + allow_duplicates!(assert_snapshot!(batches_to_sort_string(&batches), @r#" +----+----+-----+-------+ | a1 | b1 | c1 | mark | +----+----+-----+-------+ @@ -1471,19 +1649,24 @@ pub(crate) mod tests { | 5 | 5 | 50 | true | | 9 | 8 | 90 | false | +----+----+-----+-------+ - "#); + "#)); + + assert_join_metrics!(metrics, 3); Ok(()) } + #[rstest] #[tokio::test] - async fn join_right_mark_with_filter() -> Result<()> { - let task_ctx = Arc::new(TaskContext::default()); + async fn join_right_mark_with_filter( + #[values(1, 2, 16)] batch_size: usize, + ) -> Result<()> { + let task_ctx = new_task_ctx(batch_size); let left = build_left_table(); let right = build_right_table(); let filter = prepare_join_filter(); - let (columns, batches) = multi_partitioned_join_collect( + let (columns, batches, metrics) = multi_partitioned_join_collect( left, right, &JoinType::RightMark, @@ -1493,7 +1676,7 @@ pub(crate) mod tests { .await?; assert_eq!(columns, vec!["a2", "b2", "c2", "mark"]); - assert_snapshot!(batches_to_sort_string(&batches), @r#" + allow_duplicates!(assert_snapshot!(batches_to_sort_string(&batches), @r#" +----+----+-----+-------+ | a2 | b2 | c2 | mark | +----+----+-----+-------+ @@ -1501,7 +1684,9 @@ pub(crate) mod tests { | 12 | 10 | 40 | false | | 2 | 2 | 80 | true | +----+----+-----+-------+ - "#); + "#)); + + assert_join_metrics!(metrics, 3); Ok(()) } @@ -1633,6 +1818,7 @@ pub(crate) mod tests { join_type: JoinType, #[values(1, 100, 1000)] left_batch_size: usize, #[values(1, 100, 1000)] right_batch_size: usize, + #[values(1001, 10000)] batch_size: usize, ) -> Result<()> { let left_columns = generate_columns(3, 1000); let left = build_table( @@ -1682,8 +1868,9 @@ pub(crate) mod tests { assert_eq!(right.options, join.options); } + let task_ctx = new_task_ctx(batch_size); let batches = nested_loop_join - .execute(0, Arc::new(TaskContext::default()))? + .execute(0, task_ctx)? .try_collect::>() .await?; diff --git a/datafusion/physical-plan/src/joins/sort_merge_join.rs b/datafusion/physical-plan/src/joins/sort_merge_join.rs index a8c209a492ba8..9a68322834866 100644 --- a/datafusion/physical-plan/src/joins/sort_merge_join.rs +++ b/datafusion/physical-plan/src/joins/sort_merge_join.rs @@ -41,7 +41,8 @@ use crate::joins::utils::{ JoinOnRef, }; use crate::metrics::{ - Count, ExecutionPlanMetricsSet, MetricBuilder, MetricsSet, SpillMetrics, + BaselineMetrics, Count, ExecutionPlanMetricsSet, MetricBuilder, MetricsSet, + SpillMetrics, }; use crate::projection::{ join_allows_pushdown, join_table_borders, new_join_children, @@ -609,8 +610,8 @@ struct SortMergeJoinMetrics { input_rows: Count, /// Number of batches produced by this operator output_batches: Count, - /// Number of rows produced by this operator - output_rows: Count, + /// Execution metrics + baseline_metrics: BaselineMetrics, /// Peak memory used for buffered data. /// Calculated as sum of peak memory values across partitions peak_mem_used: metrics::Gauge, @@ -627,16 +628,17 @@ impl SortMergeJoinMetrics { let input_rows = MetricBuilder::new(metrics).counter("input_rows", partition); let output_batches = MetricBuilder::new(metrics).counter("output_batches", partition); - let output_rows = MetricBuilder::new(metrics).output_rows(partition); let peak_mem_used = MetricBuilder::new(metrics).gauge("peak_mem_used", partition); let spill_metrics = SpillMetrics::new(metrics, partition); + let baseline_metrics = BaselineMetrics::new(metrics, partition); + Self { join_time, input_batches, input_rows, output_batches, - output_rows, + baseline_metrics, peak_mem_used, spill_metrics, } @@ -2032,7 +2034,9 @@ impl SortMergeJoinStream { let record_batch = concat_batches(&self.schema, &self.staging_output_record_batches.batches)?; self.join_metrics.output_batches.add(1); - self.join_metrics.output_rows.add(record_batch.num_rows()); + self.join_metrics + .baseline_metrics + .record_output(record_batch.num_rows()); // If join filter exists, `self.output_size` is not accurate as we don't know the exact // number of rows in the output record batch. If streamed row joined with buffered rows, // once join filter is applied, the number of output rows may be more than 1. @@ -2059,6 +2063,7 @@ impl SortMergeJoinStream { { self.staging_output_record_batches.batches.clear(); } + Ok(record_batch) } diff --git a/datafusion/physical-plan/src/joins/stream_join_utils.rs b/datafusion/physical-plan/src/joins/stream_join_utils.rs index 677601a12845f..9f5485ee93bde 100644 --- a/datafusion/physical-plan/src/joins/stream_join_utils.rs +++ b/datafusion/physical-plan/src/joins/stream_join_utils.rs @@ -22,8 +22,12 @@ use std::collections::{HashMap, VecDeque}; use std::mem::size_of; use std::sync::Arc; +use crate::joins::join_hash_map::{ + get_matched_indices, get_matched_indices_with_limit_offset, update_from_iter, + JoinHashMapOffset, +}; use crate::joins::utils::{JoinFilter, JoinHashMapType}; -use crate::metrics::{ExecutionPlanMetricsSet, MetricBuilder}; +use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricBuilder}; use crate::{metrics, ExecutionPlan}; use arrow::array::{ @@ -47,26 +51,49 @@ use hashbrown::HashTable; /// Implementation of `JoinHashMapType` for `PruningJoinHashMap`. impl JoinHashMapType for PruningJoinHashMap { - type NextType = VecDeque; - // Extend with zero fn extend_zero(&mut self, len: usize) { self.next.resize(self.next.len() + len, 0) } - /// Get mutable references to the hash map and the next. - fn get_mut(&mut self) -> (&mut HashTable<(u64, u64)>, &mut Self::NextType) { - (&mut self.map, &mut self.next) + fn update_from_iter<'a>( + &mut self, + iter: Box + Send + 'a>, + deleted_offset: usize, + ) { + let slice: &mut [u64] = self.next.make_contiguous(); + update_from_iter::(&mut self.map, slice, iter, deleted_offset); } - /// Get a reference to the hash map. - fn get_map(&self) -> &HashTable<(u64, u64)> { - &self.map + fn get_matched_indices<'a>( + &self, + iter: Box + 'a>, + deleted_offset: Option, + ) -> (Vec, Vec) { + // Flatten the deque + let next: Vec = self.next.iter().copied().collect(); + get_matched_indices::(&self.map, &next, iter, deleted_offset) } - /// Get a reference to the next. - fn get_list(&self) -> &Self::NextType { - &self.next + fn get_matched_indices_with_limit_offset( + &self, + hash_values: &[u64], + limit: usize, + offset: JoinHashMapOffset, + ) -> (Vec, Vec, Option) { + // Flatten the deque + let next: Vec = self.next.iter().copied().collect(); + get_matched_indices_with_limit_offset::( + &self.map, + &next, + hash_values, + limit, + offset, + ) + } + + fn is_empty(&self) -> bool { + self.map.is_empty() } } @@ -659,7 +686,7 @@ pub struct StreamJoinMetrics { /// Number of batches produced by this operator pub(crate) output_batches: metrics::Count, /// Number of rows produced by this operator - pub(crate) output_rows: metrics::Count, + pub(crate) baseline_metrics: BaselineMetrics, } impl StreamJoinMetrics { @@ -686,14 +713,12 @@ impl StreamJoinMetrics { let output_batches = MetricBuilder::new(metrics).counter("output_batches", partition); - let output_rows = MetricBuilder::new(metrics).output_rows(partition); - Self { left, right, output_batches, stream_memory_usage, - output_rows, + baseline_metrics: BaselineMetrics::new(metrics, partition), } } } diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs index 6dbe75cc0ae46..9a8d4cbb66050 100644 --- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs +++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs @@ -992,6 +992,7 @@ pub(crate) fn join_with_probe_batch( probe_indices, filter, build_hash_joiner.build_side, + None, )? } else { (build_indices, probe_indices) @@ -1108,8 +1109,10 @@ fn lookup_join_hashmap( // (5,1) // // With this approach, the lexicographic order on both the probe side and the build side is preserved. - let (mut matched_probe, mut matched_build) = build_hashmap - .get_matched_indices(hash_values.iter().enumerate().rev(), deleted_offset); + let (mut matched_probe, mut matched_build) = build_hashmap.get_matched_indices( + Box::new(hash_values.iter().enumerate().rev()), + deleted_offset, + ); matched_probe.reverse(); matched_build.reverse(); @@ -1375,8 +1378,10 @@ impl SymmetricHashJoinStream { } Some((batch, _)) => { self.metrics.output_batches.add(1); - self.metrics.output_rows.add(batch.num_rows()); - return Poll::Ready(Some(Ok(batch))); + return self + .metrics + .baseline_metrics + .record_poll(Poll::Ready(Some(Ok(batch)))); } } } diff --git a/datafusion/physical-plan/src/joins/utils.rs b/datafusion/physical-plan/src/joins/utils.rs index c5f7087ac195f..35827d4fcd729 100644 --- a/datafusion/physical-plan/src/joins/utils.rs +++ b/datafusion/physical-plan/src/joins/utils.rs @@ -17,6 +17,7 @@ //! Join related functionality used both on logical and physical plans +use std::cmp::min; use std::collections::HashSet; use std::fmt::{self, Debug}; use std::future::Future; @@ -26,22 +27,23 @@ use std::sync::Arc; use std::task::{Context, Poll}; use crate::joins::SharedBitmapBuilder; -use crate::metrics::{self, ExecutionPlanMetricsSet, MetricBuilder}; +use crate::metrics::{self, BaselineMetrics, ExecutionPlanMetricsSet, MetricBuilder}; use crate::projection::ProjectionExec; use crate::{ ColumnStatistics, ExecutionPlan, ExecutionPlanProperties, Partitioning, Statistics, }; // compatibility pub use super::join_filter::JoinFilter; -pub use super::join_hash_map::{JoinHashMap, JoinHashMapType}; +pub use super::join_hash_map::JoinHashMapType; pub use crate::joins::{JoinOn, JoinOnRef}; +use arrow::array::BooleanArray; use arrow::array::{ builder::UInt64Builder, downcast_array, new_null_array, Array, ArrowPrimitiveType, BooleanBufferBuilder, NativeAdapter, PrimitiveArray, RecordBatch, RecordBatchOptions, UInt32Array, UInt32Builder, UInt64Array, }; -use arrow::buffer::NullBuffer; +use arrow::buffer::{BooleanBuffer, NullBuffer}; use arrow::compute; use arrow::datatypes::{ ArrowNativeType, Field, Schema, SchemaBuilder, UInt32Type, UInt64Type, @@ -843,24 +845,56 @@ pub(crate) fn apply_join_filter_to_indices( probe_indices: UInt32Array, filter: &JoinFilter, build_side: JoinSide, + max_intermediate_size: Option, ) -> Result<(UInt64Array, UInt32Array)> { if build_indices.is_empty() && probe_indices.is_empty() { return Ok((build_indices, probe_indices)); }; - let intermediate_batch = build_batch_from_indices( - filter.schema(), - build_input_buffer, - probe_batch, - &build_indices, - &probe_indices, - filter.column_indices(), - build_side, - )?; - let filter_result = filter - .expression() - .evaluate(&intermediate_batch)? - .into_array(intermediate_batch.num_rows())?; + let filter_result = if let Some(max_size) = max_intermediate_size { + let mut filter_results = + Vec::with_capacity(build_indices.len().div_ceil(max_size)); + + for i in (0..build_indices.len()).step_by(max_size) { + let end = min(build_indices.len(), i + max_size); + let len = end - i; + let intermediate_batch = build_batch_from_indices( + filter.schema(), + build_input_buffer, + probe_batch, + &build_indices.slice(i, len), + &probe_indices.slice(i, len), + filter.column_indices(), + build_side, + )?; + let filter_result = filter + .expression() + .evaluate(&intermediate_batch)? + .into_array(intermediate_batch.num_rows())?; + filter_results.push(filter_result); + } + + let filter_refs: Vec<&dyn Array> = + filter_results.iter().map(|a| a.as_ref()).collect(); + + compute::concat(&filter_refs)? + } else { + let intermediate_batch = build_batch_from_indices( + filter.schema(), + build_input_buffer, + probe_batch, + &build_indices, + &probe_indices, + filter.column_indices(), + build_side, + )?; + + filter + .expression() + .evaluate(&intermediate_batch)? + .into_array(intermediate_batch.num_rows())? + }; + let mask = as_boolean_array(&filter_result)?; let left_filtered = compute::filter(&build_indices, mask)?; @@ -923,11 +957,61 @@ pub(crate) fn build_batch_from_indices( compute::take(array.as_ref(), probe_indices, None)? } }; + columns.push(array); } Ok(RecordBatch::try_new(Arc::new(schema.clone()), columns)?) } +/// Returns a new [RecordBatch] resulting of a join where the build/left side is empty. +/// The resulting batch has [Schema] `schema`. +pub(crate) fn build_batch_empty_build_side( + schema: &Schema, + build_batch: &RecordBatch, + probe_batch: &RecordBatch, + column_indices: &[ColumnIndex], + join_type: JoinType, +) -> Result { + match join_type { + // these join types only return data if the left side is not empty, so we return an + // empty RecordBatch + JoinType::Inner + | JoinType::Left + | JoinType::LeftSemi + | JoinType::RightSemi + | JoinType::LeftAnti + | JoinType::LeftMark => Ok(RecordBatch::new_empty(Arc::new(schema.clone()))), + + // the remaining joins will return data for the right columns and null for the left ones + JoinType::Right | JoinType::Full | JoinType::RightAnti | JoinType::RightMark => { + let num_rows = probe_batch.num_rows(); + let mut columns: Vec> = + Vec::with_capacity(schema.fields().len()); + + for column_index in column_indices { + let array = match column_index.side { + // left -> null array + JoinSide::Left => new_null_array( + build_batch.column(column_index.index).data_type(), + num_rows, + ), + // right -> respective right array + JoinSide::Right => Arc::clone(probe_batch.column(column_index.index)), + // right mark -> unset boolean array as there are no matches on the left side + JoinSide::None => Arc::new(BooleanArray::new( + BooleanBuffer::new_unset(num_rows), + None, + )), + }; + + columns.push(array); + } + + Ok(RecordBatch::try_new(Arc::new(schema.clone()), columns)?) + } + } +} + /// The input is the matched indices for left and right and /// adjust the indices according to the join type pub(crate) fn adjust_indices_by_join_type( @@ -1196,6 +1280,7 @@ fn append_probe_indices_in_order( /// Metrics for build & probe joins #[derive(Clone, Debug)] pub(crate) struct BuildProbeJoinMetrics { + pub(crate) baseline: BaselineMetrics, /// Total time for collecting build-side of join pub(crate) build_time: metrics::Time, /// Number of batches consumed by build-side @@ -1212,12 +1297,31 @@ pub(crate) struct BuildProbeJoinMetrics { pub(crate) input_rows: metrics::Count, /// Number of batches produced by this operator pub(crate) output_batches: metrics::Count, - /// Number of rows produced by this operator - pub(crate) output_rows: metrics::Count, +} + +// This Drop implementation updates the elapsed compute part of the metrics. +// +// Why is this in a Drop? +// - We keep track of build_time and join_time separately, but baseline metrics have +// a total elapsed_compute time. Instead of remembering to update both the metrics +// at the same time, we chose to update elapsed_compute once at the end - summing up +// both the parts. +// +// How does this work? +// - The elapsed_compute `Time` is represented by an `Arc`. So even when +// this `BuildProbeJoinMetrics` is dropped, the elapsed_compute is usable through the +// Arc reference. +impl Drop for BuildProbeJoinMetrics { + fn drop(&mut self) { + self.baseline.elapsed_compute().add(&self.build_time); + self.baseline.elapsed_compute().add(&self.join_time); + } } impl BuildProbeJoinMetrics { pub fn new(partition: usize, metrics: &ExecutionPlanMetricsSet) -> Self { + let baseline = BaselineMetrics::new(metrics, partition); + let join_time = MetricBuilder::new(metrics).subset_time("join_time", partition); let build_time = MetricBuilder::new(metrics).subset_time("build_time", partition); @@ -1239,8 +1343,6 @@ impl BuildProbeJoinMetrics { let output_batches = MetricBuilder::new(metrics).counter("output_batches", partition); - let output_rows = MetricBuilder::new(metrics).output_rows(partition); - Self { build_time, build_input_batches, @@ -1250,7 +1352,7 @@ impl BuildProbeJoinMetrics { input_batches, input_rows, output_batches, - output_rows, + baseline, } } } diff --git a/datafusion/physical-plan/src/metrics/baseline.rs b/datafusion/physical-plan/src/metrics/baseline.rs index a52336108a87f..41d67c2917495 100644 --- a/datafusion/physical-plan/src/metrics/baseline.rs +++ b/datafusion/physical-plan/src/metrics/baseline.rs @@ -169,6 +169,23 @@ impl SpillMetrics { } } +/// Metrics for tracking [`crate::stream::BatchSplitStream`] activity +#[derive(Debug, Clone)] +pub struct SplitMetrics { + /// Number of times an input [`RecordBatch`] was split + pub batches_splitted: Count, +} + +impl SplitMetrics { + /// Create a new [`SplitMetrics`] + pub fn new(metrics: &ExecutionPlanMetricsSet, partition: usize) -> Self { + Self { + batches_splitted: MetricBuilder::new(metrics) + .counter("batches_splitted", partition), + } + } +} + /// Trait for things that produce output rows as a result of execution. pub trait RecordOutput { /// Record that some number of output rows have been produced diff --git a/datafusion/physical-plan/src/metrics/mod.rs b/datafusion/physical-plan/src/metrics/mod.rs index 87783eada8b00..0b9b4bed856b8 100644 --- a/datafusion/physical-plan/src/metrics/mod.rs +++ b/datafusion/physical-plan/src/metrics/mod.rs @@ -32,7 +32,7 @@ use std::{ use datafusion_common::HashMap; // public exports -pub use baseline::{BaselineMetrics, RecordOutput, SpillMetrics}; +pub use baseline::{BaselineMetrics, RecordOutput, SpillMetrics, SplitMetrics}; pub use builder::MetricBuilder; pub use custom::CustomMetricValue; pub use value::{Count, Gauge, MetricValue, ScopedTimerGuard, Time, Timestamp}; @@ -387,7 +387,7 @@ impl ExecutionPlanMetricsSet { /// "tags" in /// [InfluxDB](https://docs.influxdata.com/influxdb/v1.8/write_protocols/line_protocol_tutorial/) /// , "attributes" in [open -/// telemetry], +/// telemetry], /// etc. /// /// As the name and value are expected to mostly be constant strings, diff --git a/datafusion/physical-plan/src/metrics/value.rs b/datafusion/physical-plan/src/metrics/value.rs index 1cc4a4fbcb05a..3149fca95ba84 100644 --- a/datafusion/physical-plan/src/metrics/value.rs +++ b/datafusion/physical-plan/src/metrics/value.rs @@ -222,6 +222,15 @@ impl Time { pub fn value(&self) -> usize { self.nanos.load(Ordering::Relaxed) } + + /// Return a scoped guard that adds the amount of time elapsed between the + /// given instant and its drop (or the call to `stop`) to the underlying metric + pub fn timer_with(&self, now: Instant) -> ScopedTimerGuard<'_> { + ScopedTimerGuard { + inner: self, + start: Some(now), + } + } } /// Stores a single timestamp, stored as the number of nanoseconds @@ -331,6 +340,20 @@ impl ScopedTimerGuard<'_> { pub fn done(mut self) { self.stop() } + + /// Stop the timer timing and record the time taken since the given endpoint. + pub fn stop_with(&mut self, end_time: Instant) { + if let Some(start) = self.start.take() { + let elapsed = end_time - start; + self.inner.add_duration(elapsed) + } + } + + /// Stop the timer, record the time taken since `end_time` endpoint, and + /// consume self. + pub fn done_with(mut self, end_time: Instant) { + self.stop_with(end_time) + } } impl Drop for ScopedTimerGuard<'_> { @@ -841,4 +864,99 @@ mod tests { ); } } + + #[test] + fn test_timer_with_custom_instant() { + let time = Time::new(); + let start_time = Instant::now(); + + // Sleep a bit to ensure some time passes + std::thread::sleep(Duration::from_millis(1)); + + // Create timer with the earlier start time + let mut timer = time.timer_with(start_time); + + // Sleep a bit more + std::thread::sleep(Duration::from_millis(1)); + + // Stop the timer + timer.stop(); + + // The recorded time should be at least 20ms (both sleeps) + assert!( + time.value() >= 2_000_000, + "Expected at least 2ms, got {} ns", + time.value() + ); + } + + #[test] + fn test_stop_with_custom_endpoint() { + let time = Time::new(); + let start = Instant::now(); + let mut timer = time.timer_with(start); + + // Simulate exactly 10ms passing + let end = start + Duration::from_millis(10); + + // Stop with custom endpoint + timer.stop_with(end); + + // Should record exactly 10ms (10_000_000 nanoseconds) + // Allow for small variations due to timer resolution + let recorded = time.value(); + assert!( + (10_000_000..=10_100_000).contains(&recorded), + "Expected ~10ms, got {recorded} ns" + ); + + // Calling stop_with again should not add more time + timer.stop_with(end); + assert_eq!( + recorded, + time.value(), + "Time should not change after second stop" + ); + } + + #[test] + fn test_done_with_custom_endpoint() { + let time = Time::new(); + let start = Instant::now(); + + // Create a new scope for the timer + { + let timer = time.timer_with(start); + + // Simulate 50ms passing + let end = start + Duration::from_millis(5); + + // Call done_with to stop and consume the timer + timer.done_with(end); + + // Timer is consumed, can't use it anymore + } + + // Should record exactly 5ms + let recorded = time.value(); + assert!( + (5_000_000..=5_100_000).contains(&recorded), + "Expected ~5ms, got {recorded} ns", + ); + + // Test that done_with prevents drop from recording time again + { + let timer2 = time.timer_with(start); + let end2 = start + Duration::from_millis(5); + timer2.done_with(end2); + // drop happens here but should not record additional time + } + + // Should have added only 5ms more + let new_recorded = time.value(); + assert!( + (10_000_000..=10_100_000).contains(&new_recorded), + "Expected ~10ms total, got {new_recorded} ns", + ); + } } diff --git a/datafusion/physical-plan/src/placeholder_row.rs b/datafusion/physical-plan/src/placeholder_row.rs index 6cd581700a88f..e7df79f867d70 100644 --- a/datafusion/physical-plan/src/placeholder_row.rs +++ b/datafusion/physical-plan/src/placeholder_row.rs @@ -171,14 +171,18 @@ impl ExecutionPlan for PlaceholderRowExec { } fn partition_statistics(&self, partition: Option) -> Result { - if partition.is_some() { - return Ok(Statistics::new_unknown(&self.schema())); - } - let batch = self + let batches = self .data() .expect("Create single row placeholder RecordBatch should not fail"); + + let batches = match partition { + Some(_) => vec![batches], + // entire plan + None => vec![batches; self.partitions], + }; + Ok(common::compute_record_batch_statistics( - &[batch], + &batches, &self.schema, None, )) diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs index 620bfa2809a90..754a208126eea 100644 --- a/datafusion/physical-plan/src/repartition/mod.rs +++ b/datafusion/physical-plan/src/repartition/mod.rs @@ -812,8 +812,7 @@ impl ExecutionPlan for RepartitionExec { parent_filters: Vec>, _config: &ConfigOptions, ) -> Result { - Ok(FilterDescription::new_with_child_count(1) - .all_parent_filters_supported(parent_filters)) + FilterDescription::from_children(parent_filters, &self.children()) } fn handle_child_pushdown_result( @@ -822,9 +821,7 @@ impl ExecutionPlan for RepartitionExec { child_pushdown_result: ChildPushdownResult, _config: &ConfigOptions, ) -> Result>> { - Ok(FilterPushdownPropagation::transparent( - child_pushdown_result, - )) + Ok(FilterPushdownPropagation::if_all(child_pushdown_result)) } } diff --git a/datafusion/physical-plan/src/sorts/cursor.rs b/datafusion/physical-plan/src/sorts/cursor.rs index 17033e6a31425..8ab603e04961e 100644 --- a/datafusion/physical-plan/src/sorts/cursor.rs +++ b/datafusion/physical-plan/src/sorts/cursor.rs @@ -16,6 +16,7 @@ // under the License. use std::cmp::Ordering; +use std::sync::Arc; use arrow::array::{ types::ByteArrayType, Array, ArrowPrimitiveType, GenericByteArray, @@ -151,7 +152,7 @@ impl Ord for Cursor { /// Used for sorting when there are multiple columns in the sort key #[derive(Debug)] pub struct RowValues { - rows: Rows, + rows: Arc, /// Tracks for the memory used by in the `Rows` of this /// cursor. Freed on drop @@ -164,7 +165,7 @@ impl RowValues { /// /// Panics if the reservation is not for exactly `rows.size()` /// bytes or if `rows` is empty. - pub fn new(rows: Rows, reservation: MemoryReservation) -> Self { + pub fn new(rows: Arc, reservation: MemoryReservation) -> Self { assert_eq!( rows.size(), reservation.size(), @@ -288,6 +289,120 @@ impl CursorArray for StringViewArray { } } +/// Todo use arrow-rs side api after: and released +/// Builds a 128-bit composite key for an inline value: +/// +/// - High 96 bits: the inline data in big-endian byte order (for correct lexicographical sorting). +/// - Low 32 bits: the length in big-endian byte order, acting as a tiebreaker so shorter strings +/// (or those with fewer meaningful bytes) always numerically sort before longer ones. +/// +/// This function extracts the length and the 12-byte inline string data from the raw +/// little-endian `u128` representation, converts them to big-endian ordering, and packs them +/// into a single `u128` value suitable for fast, branchless comparisons. +/// +/// # Why include length? +/// +/// A pure 96-bit content comparison can’t distinguish between two values whose inline bytes +/// compare equal—either because one is a true prefix of the other or because zero-padding +/// hides extra bytes. By tucking the 32-bit length into the lower bits, a single `u128` compare +/// handles both content and length in one go. +/// +/// Example: comparing "bar" (3 bytes) vs "bar\0" (4 bytes) +/// +/// | String | Bytes 0–4 (length LE) | Bytes 4–16 (data + padding) | +/// |------------|-----------------------|---------------------------------| +/// | `"bar"` | `03 00 00 00` | `62 61 72` + 9 × `00` | +/// | `"bar\0"`| `04 00 00 00` | `62 61 72 00` + 8 × `00` | +/// +/// Both inline parts become `62 61 72 00…00`, so they tie on content. The length field +/// then differentiates: +/// +/// ```text +/// key("bar") = 0x0000000000000000000062617200000003 +/// key("bar\0") = 0x0000000000000000000062617200000004 +/// ⇒ key("bar") < key("bar\0") +/// ``` +/// # Inlining and Endianness +/// +/// - We start by calling `.to_le_bytes()` on the `raw` `u128`, because Rust’s native in‑memory +/// representation is little‑endian on x86/ARM. +/// - We extract the low 32 bits numerically (`raw as u32`)—this step is endianness‑free. +/// - We copy the 12 bytes of inline data (original order) into `buf[0..12]`. +/// - We serialize `length` as big‑endian into `buf[12..16]`. +/// - Finally, `u128::from_be_bytes(buf)` treats `buf[0]` as the most significant byte +/// and `buf[15]` as the least significant, producing a `u128` whose integer value +/// directly encodes “inline data then length” in big‑endian form. +/// +/// This ensures that a simple `u128` comparison is equivalent to the desired +/// lexicographical comparison of the inline bytes followed by length. +#[inline(always)] +pub fn inline_key_fast(raw: u128) -> u128 { + // 1. Decompose `raw` into little‑endian bytes: + // - raw_bytes[0..4] = length in LE + // - raw_bytes[4..16] = inline string data + let raw_bytes = raw.to_le_bytes(); + + // 2. Numerically truncate to get the low 32‑bit length (endianness‑free). + let length = raw as u32; + + // 3. Build a 16‑byte buffer in big‑endian order: + // - buf[0..12] = inline string bytes (in original order) + // - buf[12..16] = length.to_be_bytes() (BE) + let mut buf = [0u8; 16]; + buf[0..12].copy_from_slice(&raw_bytes[4..16]); // inline data + + // Why convert length to big-endian for comparison? + // + // Rust (on most platforms) stores integers in little-endian format, + // meaning the least significant byte is at the lowest memory address. + // For example, an u32 value like 0x22345677 is stored in memory as: + // + // [0x77, 0x56, 0x34, 0x22] // little-endian layout + // ^ ^ ^ ^ + // LSB ↑↑↑ MSB + // + // This layout is efficient for arithmetic but *not* suitable for + // lexicographic (dictionary-style) comparison of byte arrays. + // + // To compare values by byte order—e.g., for sorted keys or binary trees— + // we must convert them to **big-endian**, where: + // + // - The most significant byte (MSB) comes first (index 0) + // - The least significant byte (LSB) comes last (index N-1) + // + // In big-endian, the same u32 = 0x22345677 would be represented as: + // + // [0x22, 0x34, 0x56, 0x77] + // + // This ordering aligns with natural string/byte sorting, so calling + // `.to_be_bytes()` allows us to construct + // keys where standard numeric comparison (e.g., `<`, `>`) behaves + // like lexicographic byte comparison. + buf[12..16].copy_from_slice(&length.to_be_bytes()); // length in BE + + // 4. Deserialize the buffer as a big‑endian u128: + // buf[0] is MSB, buf[15] is LSB. + // Details: + // Note on endianness and layout: + // + // Although `buf[0]` is stored at the lowest memory address, + // calling `u128::from_be_bytes(buf)` interprets it as the **most significant byte (MSB)**, + // and `buf[15]` as the **least significant byte (LSB)**. + // + // This is the core principle of **big-endian decoding**: + // - Byte at index 0 maps to bits 127..120 (highest) + // - Byte at index 1 maps to bits 119..112 + // - ... + // - Byte at index 15 maps to bits 7..0 (lowest) + // + // So even though memory layout goes from low to high (left to right), + // big-endian treats the **first byte** as highest in value. + // + // This guarantees that comparing two `u128` keys is equivalent to lexicographically + // comparing the original inline bytes, followed by length. + u128::from_be_bytes(buf) +} + impl CursorValues for StringViewArray { fn len(&self) -> usize { self.views().len() @@ -302,7 +417,7 @@ impl CursorValues for StringViewArray { let r_view = unsafe { r.views().get_unchecked(r_idx) }; if l.data_buffers().is_empty() && r.data_buffers().is_empty() { - return l_view.eq(r_view); + return l_view == r_view; } let l_len = *l_view as u32; @@ -322,12 +437,12 @@ impl CursorValues for StringViewArray { let l_view = unsafe { cursor.views().get_unchecked(idx) }; let r_view = unsafe { cursor.views().get_unchecked(idx - 1) }; if cursor.data_buffers().is_empty() { - return l_view.eq(r_view); + return l_view == r_view; } let l_len = *l_view as u32; - let r_len = *r_view as u32; + if l_len != r_len { return false; } @@ -345,11 +460,7 @@ impl CursorValues for StringViewArray { if l.data_buffers().is_empty() && r.data_buffers().is_empty() { let l_view = unsafe { l.views().get_unchecked(l_idx) }; let r_view = unsafe { r.views().get_unchecked(r_idx) }; - let l_len = *l_view as u32; - let r_len = *r_view as u32; - let l_data = unsafe { StringViewArray::inline_value(l_view, l_len as usize) }; - let r_data = unsafe { StringViewArray::inline_value(r_view, r_len as usize) }; - return l_data.cmp(r_data); + return inline_key_fast(*l_view).cmp(&inline_key_fast(*r_view)); } unsafe { GenericByteViewArray::compare_unchecked(l, l_idx, r, r_idx) } @@ -444,11 +555,11 @@ impl CursorValues for ArrayValues { #[cfg(test)] mod tests { - use std::sync::Arc; - + use arrow::array::GenericBinaryArray; use datafusion_execution::memory_pool::{ GreedyMemoryPool, MemoryConsumer, MemoryPool, }; + use std::sync::Arc; use super::*; @@ -609,4 +720,100 @@ mod tests { b.advance(); assert_eq!(a.cmp(&b), Ordering::Less); } + + /// Integration tests for `inline_key_fast` covering: + /// + /// 1. Monotonic ordering across increasing lengths and lexical variations. + /// 2. Cross-check against `GenericBinaryArray` comparison to ensure semantic equivalence. + /// + /// This also includes a specific test for the “bar” vs. “bar\0” case, demonstrating why + /// the length field is required even when all inline bytes fit in 12 bytes. + /// + /// The test includes strings that verify correct byte order (prevent reversal bugs), + /// and length-based tie-breaking in the composite key. + /// + /// The test confirms that `inline_key_fast` produces keys which sort consistently + /// with the expected lexicographical order of the raw byte arrays. + #[test] + fn test_inline_key_fast_various_lengths_and_lexical() { + /// Helper to create a raw u128 value representing an inline ByteView: + /// - `length`: number of meaningful bytes (must be ≤ 12) + /// - `data`: the actual inline data bytes + /// + /// The first 4 bytes encode length in little-endian, + /// the following 12 bytes contain the inline string data (unpadded). + fn make_raw_inline(length: u32, data: &[u8]) -> u128 { + assert!(length as usize <= 12, "Inline length must be ≤ 12"); + assert!( + data.len() == length as usize, + "Data length must match `length`" + ); + + let mut raw_bytes = [0u8; 16]; + raw_bytes[0..4].copy_from_slice(&length.to_le_bytes()); // length stored little-endian + raw_bytes[4..(4 + data.len())].copy_from_slice(data); // inline data + u128::from_le_bytes(raw_bytes) + } + + // Test inputs: various lengths and lexical orders, + // plus special cases for byte order and length tie-breaking + let test_inputs: Vec<&[u8]> = vec![ + b"a", + b"aa", + b"aaa", + b"aab", + b"abcd", + b"abcde", + b"abcdef", + b"abcdefg", + b"abcdefgh", + b"abcdefghi", + b"abcdefghij", + b"abcdefghijk", + b"abcdefghijkl", + // Tests for byte-order reversal bug: + // Without the fix, "backend one" would compare as "eno dnekcab", + // causing incorrect sort order relative to "backend two". + b"backend one", + b"backend two", + // Tests length-tiebreaker logic: + // "bar" (3 bytes) and "bar\0" (4 bytes) have identical inline data, + // so only the length differentiates their ordering. + b"bar", + b"bar\0", + // Additional lexical and length tie-breaking cases with same prefix, in correct lex order: + b"than12Byt", + b"than12Bytes", + b"than12Bytes\0", + b"than12Bytesx", + b"than12Bytex", + b"than12Bytez", + // Additional lexical tests + b"xyy", + b"xyz", + b"xza", + ]; + + // Create a GenericBinaryArray for cross-comparison of lex order + let array: GenericBinaryArray = GenericBinaryArray::from( + test_inputs.iter().map(|s| Some(*s)).collect::>(), + ); + + for i in 0..array.len() - 1 { + let v1 = array.value(i); + let v2 = array.value(i + 1); + + // Assert the array's natural lexical ordering is correct + assert!(v1 < v2, "Array compare failed: {v1:?} !< {v2:?}"); + + // Assert the keys produced by inline_key_fast reflect the same ordering + let key1 = inline_key_fast(make_raw_inline(v1.len() as u32, v1)); + let key2 = inline_key_fast(make_raw_inline(v2.len() as u32, v2)); + + assert!( + key1 < key2, + "Key compare failed: key({v1:?})=0x{key1:032x} !< key({v2:?})=0x{key2:032x}", + ); + } + } } diff --git a/datafusion/physical-plan/src/sorts/merge.rs b/datafusion/physical-plan/src/sorts/merge.rs index 0c18a3b6c7032..ca2d5f2105f22 100644 --- a/datafusion/physical-plan/src/sorts/merge.rs +++ b/datafusion/physical-plan/src/sorts/merge.rs @@ -493,13 +493,12 @@ impl SortPreservingMergeStream { if self.enable_round_robin_tie_breaker && cmp_node == 1 { match (&self.cursors[winner], &self.cursors[challenger]) { (Some(ac), Some(bc)) => { - let ord = ac.cmp(bc); - if ord.is_eq() { + if ac == bc { self.handle_tie(cmp_node, &mut winner, challenger); } else { // Ends of tie breaker self.round_robin_tie_breaker_mode = false; - if ord.is_gt() { + if ac > bc { self.update_winner(cmp_node, &mut winner, challenger); } } diff --git a/datafusion/physical-plan/src/sorts/partial_sort.rs b/datafusion/physical-plan/src/sorts/partial_sort.rs index 32b34a75cc763..513081e627e1a 100644 --- a/datafusion/physical-plan/src/sorts/partial_sort.rs +++ b/datafusion/physical-plan/src/sorts/partial_sort.rs @@ -305,7 +305,7 @@ impl ExecutionPlan for PartialSortExec { input, expr: self.expr.clone(), common_prefix_length: self.common_prefix_length, - in_mem_batches: vec![], + in_mem_batch: RecordBatch::new_empty(Arc::clone(&self.schema())), fetch: self.fetch, is_closed: false, baseline_metrics: BaselineMetrics::new(&self.metrics_set, partition), @@ -334,7 +334,7 @@ struct PartialSortStream { /// should be more than 0 otherwise PartialSort is not applicable common_prefix_length: usize, /// Used as a buffer for part of the input not ready for sort - in_mem_batches: Vec, + in_mem_batch: RecordBatch, /// Fetch top N results fetch: Option, /// Whether the stream has finished returning all of its data or not @@ -375,51 +375,61 @@ impl PartialSortStream { return Poll::Ready(None); } loop { - return Poll::Ready(match ready!(self.input.poll_next_unpin(cx)) { + // Check if we've already reached the fetch limit + if self.fetch == Some(0) { + self.is_closed = true; + return Poll::Ready(None); + } + + match ready!(self.input.poll_next_unpin(cx)) { Some(Ok(batch)) => { - if let Some(slice_point) = - self.get_slice_point(self.common_prefix_length, &batch)? + // Merge new batch into in_mem_batch + self.in_mem_batch = concat_batches( + &self.schema(), + &[self.in_mem_batch.clone(), batch], + )?; + + // Check if we have a slice point, otherwise keep accumulating in `self.in_mem_batch`. + if let Some(slice_point) = self + .get_slice_point(self.common_prefix_length, &self.in_mem_batch)? { - self.in_mem_batches.push(batch.slice(0, slice_point)); - let remaining_batch = - batch.slice(slice_point, batch.num_rows() - slice_point); - // Extract the sorted batch - let sorted_batch = self.sort_in_mem_batches(); - // Refill with the remaining batch - self.in_mem_batches.push(remaining_batch); - - debug_assert!(sorted_batch - .as_ref() - .map(|batch| batch.num_rows() > 0) - .unwrap_or(true)); - Some(sorted_batch) - } else { - self.in_mem_batches.push(batch); - continue; + let sorted = self.in_mem_batch.slice(0, slice_point); + self.in_mem_batch = self.in_mem_batch.slice( + slice_point, + self.in_mem_batch.num_rows() - slice_point, + ); + let sorted_batch = sort_batch(&sorted, &self.expr, self.fetch)?; + if let Some(fetch) = self.fetch.as_mut() { + *fetch -= sorted_batch.num_rows(); + } + + if sorted_batch.num_rows() > 0 { + return Poll::Ready(Some(Ok(sorted_batch))); + } } } - Some(Err(e)) => Some(Err(e)), + Some(Err(e)) => return Poll::Ready(Some(Err(e))), None => { self.is_closed = true; - // once input is consumed, sort the rest of the inserted batches - let remaining_batch = self.sort_in_mem_batches()?; - if remaining_batch.num_rows() > 0 { - Some(Ok(remaining_batch)) + // Once input is consumed, sort the rest of the inserted batches + let remaining_batch = self.sort_in_mem_batch()?; + return if remaining_batch.num_rows() > 0 { + Poll::Ready(Some(Ok(remaining_batch))) } else { - None - } + Poll::Ready(None) + }; } - }); + }; } } /// Returns a sorted RecordBatch from in_mem_batches and clears in_mem_batches /// - /// If fetch is specified for PartialSortStream `sort_in_mem_batches` will limit + /// If fetch is specified for PartialSortStream `sort_in_mem_batch` will limit /// the last RecordBatch returned and will mark the stream as closed - fn sort_in_mem_batches(self: &mut Pin<&mut Self>) -> Result { - let input_batch = concat_batches(&self.schema(), &self.in_mem_batches)?; - self.in_mem_batches.clear(); + fn sort_in_mem_batch(self: &mut Pin<&mut Self>) -> Result { + let input_batch = self.in_mem_batch.clone(); + self.in_mem_batch = RecordBatch::new_empty(self.schema()); let result = sort_batch(&input_batch, &self.expr, self.fetch)?; if let Some(remaining_fetch) = self.fetch { // remaining_fetch - result.num_rows() is always be >= 0 @@ -1091,4 +1101,87 @@ mod tests { Ok(()) } + + #[tokio::test] + async fn test_partial_sort_with_homogeneous_batches() -> Result<()> { + // Test case for the bug where batches with homogeneous sort keys + // (e.g., [1,1,1], [2,2,2]) would not be properly detected as having + // slice points between batches. + let task_ctx = Arc::new(TaskContext::default()); + + // Create batches where each batch has homogeneous values for sort keys + let batch1 = test::build_table_i32( + ("a", &vec![1; 3]), + ("b", &vec![1; 3]), + ("c", &vec![3, 2, 1]), + ); + let batch2 = test::build_table_i32( + ("a", &vec![2; 3]), + ("b", &vec![2; 3]), + ("c", &vec![4, 6, 4]), + ); + let batch3 = test::build_table_i32( + ("a", &vec![3; 3]), + ("b", &vec![3; 3]), + ("c", &vec![9, 7, 8]), + ); + + let schema = batch1.schema(); + let mem_exec = TestMemoryExec::try_new_exec( + &[vec![batch1, batch2, batch3]], + Arc::clone(&schema), + None, + )?; + + let option_asc = SortOptions { + descending: false, + nulls_first: false, + }; + + // Partial sort with common prefix of 2 (sorting by a, b, c) + let partial_sort_exec = Arc::new(PartialSortExec::new( + [ + PhysicalSortExpr { + expr: col("a", &schema)?, + options: option_asc, + }, + PhysicalSortExpr { + expr: col("b", &schema)?, + options: option_asc, + }, + PhysicalSortExpr { + expr: col("c", &schema)?, + options: option_asc, + }, + ] + .into(), + mem_exec, + 2, + )); + + let result = collect(partial_sort_exec, Arc::clone(&task_ctx)).await?; + + assert_eq!(result.len(), 3,); + + allow_duplicates! { + assert_snapshot!(batches_to_string(&result), @r#" + +---+---+---+ + | a | b | c | + +---+---+---+ + | 1 | 1 | 1 | + | 1 | 1 | 2 | + | 1 | 1 | 3 | + | 2 | 2 | 4 | + | 2 | 2 | 4 | + | 2 | 2 | 6 | + | 3 | 3 | 7 | + | 3 | 3 | 8 | + | 3 | 3 | 9 | + +---+---+---+ + "#); + } + + assert_eq!(task_ctx.runtime_env().memory_pool.reserved(), 0,); + Ok(()) + } } diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs index 21f98fd012605..bb572c4315fb8 100644 --- a/datafusion/physical-plan/src/sorts/sort.rs +++ b/datafusion/physical-plan/src/sorts/sort.rs @@ -27,7 +27,9 @@ use std::sync::Arc; use crate::common::spawn_buffered; use crate::execution_plan::{Boundedness, CardinalityEffect, EmissionType}; use crate::expressions::PhysicalSortExpr; -use crate::filter_pushdown::{FilterDescription, FilterPushdownPhase}; +use crate::filter_pushdown::{ + ChildFilterDescription, FilterDescription, FilterPushdownPhase, +}; use crate::limit::LimitStream; use crate::metrics::{ BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet, SpillMetrics, @@ -1268,19 +1270,20 @@ impl ExecutionPlan for SortExec { config: &datafusion_common::config::ConfigOptions, ) -> Result { if !matches!(phase, FilterPushdownPhase::Post) { - return Ok(FilterDescription::new_with_child_count(1) - .all_parent_filters_supported(parent_filters)); + return FilterDescription::from_children(parent_filters, &self.children()); } + + let mut child = + ChildFilterDescription::from_child(&parent_filters, self.input())?; + if let Some(filter) = &self.filter { if config.optimizer.enable_dynamic_filter_pushdown { - let filter = Arc::clone(filter) as Arc; - return Ok(FilterDescription::new_with_child_count(1) - .all_parent_filters_supported(parent_filters) - .with_self_filter(filter)); + child = + child.with_self_filter(Arc::clone(filter) as Arc); } } - Ok(FilterDescription::new_with_child_count(1) - .all_parent_filters_supported(parent_filters)) + + Ok(FilterDescription::new().with_child(child)) } } diff --git a/datafusion/physical-plan/src/sorts/stream.rs b/datafusion/physical-plan/src/sorts/stream.rs index e029c60b285b6..49e7413122fca 100644 --- a/datafusion/physical-plan/src/sorts/stream.rs +++ b/datafusion/physical-plan/src/sorts/stream.rs @@ -21,8 +21,8 @@ use crate::{PhysicalExpr, PhysicalSortExpr}; use arrow::array::Array; use arrow::datatypes::Schema; use arrow::record_batch::RecordBatch; -use arrow::row::{RowConverter, SortField}; -use datafusion_common::Result; +use arrow::row::{RowConverter, Rows, SortField}; +use datafusion_common::{internal_datafusion_err, Result}; use datafusion_execution::memory_pool::MemoryReservation; use datafusion_physical_expr_common::sort_expr::LexOrdering; use futures::stream::{Fuse, StreamExt}; @@ -76,8 +76,40 @@ impl FusedStreams { } } +/// A pair of `Arc` that can be reused +#[derive(Debug)] +struct ReusableRows { + // inner[stream_idx] holds a two Arcs: + // at start of a new poll + // .0 is the rows from the previous poll (at start), + // .1 is the one that is being written to + // at end of a poll, .0 will be swapped with .1, + inner: Vec<[Option>; 2]>, +} + +impl ReusableRows { + // return a Rows for writing, + // does not clone if the existing rows can be reused + fn take_next(&mut self, stream_idx: usize) -> Result { + Arc::try_unwrap(self.inner[stream_idx][1].take().unwrap()).map_err(|_| { + internal_datafusion_err!( + "Rows from RowCursorStream is still in use by consumer" + ) + }) + } + // save the Rows + fn save(&mut self, stream_idx: usize, rows: Arc) { + self.inner[stream_idx][1] = Some(Arc::clone(&rows)); + // swap the curent with the previous one, so that the next poll can reuse the Rows from the previous poll + let [a, b] = &mut self.inner[stream_idx]; + std::mem::swap(a, b); + } +} + /// A [`PartitionedStream`] that wraps a set of [`SendableRecordBatchStream`] /// and computes [`RowValues`] based on the provided [`PhysicalSortExpr`] +/// Note: the stream returns an error if the consumer buffers more than one RowValues (i.e. holds on to two RowValues +/// from the same partition at the same time). #[derive(Debug)] pub struct RowCursorStream { /// Converter to convert output of physical expressions @@ -88,6 +120,9 @@ pub struct RowCursorStream { streams: FusedStreams, /// Tracks the memory used by `converter` reservation: MemoryReservation, + /// Allocated rows for each partition, we keep two to allow for buffering one + /// in the consumer of the stream + rows: ReusableRows, } impl RowCursorStream { @@ -105,26 +140,48 @@ impl RowCursorStream { }) .collect::>>()?; - let streams = streams.into_iter().map(|s| s.fuse()).collect(); + let streams: Vec<_> = streams.into_iter().map(|s| s.fuse()).collect(); let converter = RowConverter::new(sort_fields)?; + let mut rows = Vec::with_capacity(streams.len()); + for _ in &streams { + // Initialize each stream with an empty Rows + rows.push([ + Some(Arc::new(converter.empty_rows(0, 0))), + Some(Arc::new(converter.empty_rows(0, 0))), + ]); + } Ok(Self { converter, reservation, column_expressions: expressions.iter().map(|x| Arc::clone(&x.expr)).collect(), streams: FusedStreams(streams), + rows: ReusableRows { inner: rows }, }) } - fn convert_batch(&mut self, batch: &RecordBatch) -> Result { + fn convert_batch( + &mut self, + batch: &RecordBatch, + stream_idx: usize, + ) -> Result { let cols = self .column_expressions .iter() .map(|expr| expr.evaluate(batch)?.into_array(batch.num_rows())) .collect::>>()?; - let rows = self.converter.convert_columns(&cols)?; + // At this point, ownership should of this Rows should be unique + let mut rows = self.rows.take_next(stream_idx)?; + + rows.clear(); + + self.converter.append(&mut rows, &cols)?; self.reservation.try_resize(self.converter.size())?; + let rows = Arc::new(rows); + + self.rows.save(stream_idx, Arc::clone(&rows)); + // track the memory in the newly created Rows. let mut rows_reservation = self.reservation.new_empty(); rows_reservation.try_grow(rows.size())?; @@ -146,7 +203,7 @@ impl PartitionedStream for RowCursorStream { ) -> Poll> { Poll::Ready(ready!(self.streams.poll_next(cx, stream_idx)).map(|r| { r.and_then(|batch| { - let cursor = self.convert_batch(&batch)?; + let cursor = self.convert_batch(&batch, stream_idx)?; Ok((cursor, batch)) }) })) diff --git a/datafusion/physical-plan/src/stream.rs b/datafusion/physical-plan/src/stream.rs index 338ac7d048a33..773a0986693a7 100644 --- a/datafusion/physical-plan/src/stream.rs +++ b/datafusion/physical-plan/src/stream.rs @@ -22,7 +22,9 @@ use std::sync::Arc; use std::task::Context; use std::task::Poll; -use super::metrics::BaselineMetrics; +#[cfg(test)] +use super::metrics::ExecutionPlanMetricsSet; +use super::metrics::{BaselineMetrics, SplitMetrics}; use super::{ExecutionPlan, RecordBatchStream, SendableRecordBatchStream}; use crate::displayable; @@ -31,6 +33,7 @@ use datafusion_common::{exec_err, Result}; use datafusion_common_runtime::JoinSet; use datafusion_execution::TaskContext; +use futures::ready; use futures::stream::BoxStream; use futures::{Future, Stream, StreamExt}; use log::debug; @@ -522,6 +525,138 @@ impl Stream for ObservedStream { } } +pin_project! { + /// Stream wrapper that splits large [`RecordBatch`]es into smaller batches. + /// + /// This ensures upstream operators receive batches no larger than + /// `batch_size`, which can improve parallelism when data sources + /// generate very large batches. + /// + /// # Fields + /// + /// - `current_batch`: The batch currently being split, if any + /// - `offset`: Index of the next row to split from `current_batch`. + /// This tracks our position within the current batch being split. + /// + /// # Invariants + /// + /// - `offset` is always ≤ `current_batch.num_rows()` when `current_batch` is `Some` + /// - When `current_batch` is `None`, `offset` is always 0 + /// - `batch_size` is always > 0 +pub struct BatchSplitStream { + #[pin] + input: SendableRecordBatchStream, + schema: SchemaRef, + batch_size: usize, + metrics: SplitMetrics, + current_batch: Option, + offset: usize, + } +} + +impl BatchSplitStream { + /// Create a new [`BatchSplitStream`] + pub fn new( + input: SendableRecordBatchStream, + batch_size: usize, + metrics: SplitMetrics, + ) -> Self { + let schema = input.schema(); + Self { + input, + schema, + batch_size, + metrics, + current_batch: None, + offset: 0, + } + } + + /// Attempt to produce the next sliced batch from the current batch. + /// + /// Returns `Some(batch)` if a slice was produced, `None` if the current batch + /// is exhausted and we need to poll upstream for more data. + fn next_sliced_batch(&mut self) -> Option> { + let batch = self.current_batch.take()?; + + // Assert slice boundary safety - offset should never exceed batch size + debug_assert!( + self.offset <= batch.num_rows(), + "Offset {} exceeds batch size {}", + self.offset, + batch.num_rows() + ); + + let remaining = batch.num_rows() - self.offset; + let to_take = remaining.min(self.batch_size); + let out = batch.slice(self.offset, to_take); + + self.metrics.batches_splitted.add(1); + self.offset += to_take; + if self.offset < batch.num_rows() { + // More data remains in this batch, store it back + self.current_batch = Some(batch); + } else { + // Batch is exhausted, reset offset + // Note: current_batch is already None since we took it at the start + self.offset = 0; + } + Some(Ok(out)) + } + + /// Poll the upstream input for the next batch. + /// + /// Returns the appropriate `Poll` result based on upstream state. + /// Small batches are passed through directly, large batches are stored + /// for slicing and return the first slice immediately. + fn poll_upstream( + &mut self, + cx: &mut Context<'_>, + ) -> Poll>> { + match ready!(self.input.as_mut().poll_next(cx)) { + Some(Ok(batch)) => { + if batch.num_rows() <= self.batch_size { + // Small batch, pass through directly + Poll::Ready(Some(Ok(batch))) + } else { + // Large batch, store for slicing and return first slice + self.current_batch = Some(batch); + // Immediately produce the first slice + match self.next_sliced_batch() { + Some(result) => Poll::Ready(Some(result)), + None => Poll::Ready(None), // Should not happen + } + } + } + Some(Err(e)) => Poll::Ready(Some(Err(e))), + None => Poll::Ready(None), + } + } +} + +impl Stream for BatchSplitStream { + type Item = Result; + + fn poll_next( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + // First, try to produce a slice from the current batch + if let Some(result) = self.next_sliced_batch() { + return Poll::Ready(Some(result)); + } + + // No current batch or current batch exhausted, poll upstream + self.poll_upstream(cx) + } +} + +impl RecordBatchStream for BatchSplitStream { + fn schema(&self) -> SchemaRef { + Arc::clone(&self.schema) + } +} + #[cfg(test)] mod test { use super::*; @@ -616,6 +751,44 @@ mod test { assert!(stream.next().await.is_none()); } + #[tokio::test] + async fn batch_split_stream_basic_functionality() { + use arrow::array::{Int32Array, RecordBatch}; + use futures::stream::{self, StreamExt}; + + let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])); + + // Create a large batch that should be split + let large_batch = RecordBatch::try_new( + Arc::clone(&schema), + vec![Arc::new(Int32Array::from((0..2000).collect::>()))], + ) + .unwrap(); + + // Create a stream with the large batch + let input_stream = stream::iter(vec![Ok(large_batch)]); + let adapter = RecordBatchStreamAdapter::new(Arc::clone(&schema), input_stream); + let batch_stream = Box::pin(adapter) as SendableRecordBatchStream; + + // Create a BatchSplitStream with batch_size = 500 + let metrics = ExecutionPlanMetricsSet::new(); + let split_metrics = SplitMetrics::new(&metrics, 0); + let mut split_stream = BatchSplitStream::new(batch_stream, 500, split_metrics); + + let mut total_rows = 0; + let mut batch_count = 0; + + while let Some(result) = split_stream.next().await { + let batch = result.unwrap(); + assert!(batch.num_rows() <= 500, "Batch size should not exceed 500"); + total_rows += batch.num_rows(); + batch_count += 1; + } + + assert_eq!(total_rows, 2000, "All rows should be preserved"); + assert_eq!(batch_count, 4, "Should have 4 batches of 500 rows each"); + } + /// Consumes all the input's partitions into a /// RecordBatchReceiverStream and runs it to completion /// diff --git a/datafusion/physical-plan/src/test.rs b/datafusion/physical-plan/src/test.rs index 5e6410a0171ea..349f9955b6914 100644 --- a/datafusion/physical-plan/src/test.rs +++ b/datafusion/physical-plan/src/test.rs @@ -131,7 +131,7 @@ impl ExecutionPlan for TestMemoryExec { } fn as_any(&self) -> &dyn Any { - unimplemented!() + self } fn properties(&self) -> &PlanProperties { @@ -522,3 +522,33 @@ impl PartitionStream for TestPartitionStream { )) } } + +#[cfg(test)] +macro_rules! assert_join_metrics { + ($metrics:expr, $expected_rows:expr) => { + assert_eq!($metrics.output_rows().unwrap(), $expected_rows); + + let elapsed_compute = $metrics + .elapsed_compute() + .expect("did not find elapsed_compute metric"); + let join_time = $metrics + .sum_by_name("join_time") + .expect("did not find join_time metric") + .as_usize(); + let build_time = $metrics + .sum_by_name("build_time") + .expect("did not find build_time metric") + .as_usize(); + // ensure join_time and build_time are considered in elapsed_compute + assert!( + join_time + build_time <= elapsed_compute, + "join_time ({}) + build_time ({}) = {} was <= elapsed_compute = {}", + join_time, + build_time, + join_time + build_time, + elapsed_compute + ); + }; +} +#[cfg(test)] +pub(crate) use assert_join_metrics; diff --git a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs index d851d08a101f8..d3335c0e7fe17 100644 --- a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs +++ b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs @@ -261,10 +261,14 @@ impl DisplayAs for BoundedWindowAggExec { .window_expr .iter() .map(|e| { + let field = match e.field() { + Ok(f) => f.to_string(), + Err(e) => format!("{e:?}"), + }; format!( - "{}: {:?}, frame: {:?}", + "{}: {}, frame: {}", e.name().to_owned(), - e.field(), + field, e.get_window_frame() ) }) @@ -1665,7 +1669,7 @@ mod tests { let batches = collect(physical_plan.execute(0, task_ctx)?).await?; let expected = vec![ - "BoundedWindowAggExec: wdw=[last: Ok(Field { name: \"last\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }, nth_value(-1): Ok(Field { name: \"nth_value(-1)\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }, nth_value(-2): Ok(Field { name: \"nth_value(-2)\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]", + "BoundedWindowAggExec: wdw=[last: Field { name: \"last\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, nth_value(-1): Field { name: \"nth_value(-1)\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, nth_value(-2): Field { name: \"nth_value(-2)\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " DataSourceExec: partitions=1, partition_sizes=[3]", ]; // Get string representation of the plan @@ -1788,7 +1792,7 @@ mod tests { let expected_plan = vec![ "ProjectionExec: expr=[sn@0 as sn, hash@1 as hash, count([Column { name: \"sn\", index: 0 }]) PARTITION BY: [[Column { name: \"hash\", index: 1 }]], ORDER BY: [[PhysicalSortExpr { expr: Column { name: \"sn\", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }]]@2 as col_2]", - " BoundedWindowAggExec: wdw=[count([Column { name: \"sn\", index: 0 }]) PARTITION BY: [[Column { name: \"hash\", index: 1 }]], ORDER BY: [[PhysicalSortExpr { expr: Column { name: \"sn\", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }]]: Ok(Field { name: \"count([Column { name: \\\"sn\\\", index: 0 }]) PARTITION BY: [[Column { name: \\\"hash\\\", index: 1 }]], ORDER BY: [[PhysicalSortExpr { expr: Column { name: \\\"sn\\\", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }]]\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(1)), is_causal: false }], mode=[Linear]", + " BoundedWindowAggExec: wdw=[count([Column { name: \"sn\", index: 0 }]) PARTITION BY: [[Column { name: \"hash\", index: 1 }]], ORDER BY: [[PhysicalSortExpr { expr: Column { name: \"sn\", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }]]: Field { name: \"count([Column { name: \\\"sn\\\", index: 0 }]) PARTITION BY: [[Column { name: \\\"hash\\\", index: 1 }]], ORDER BY: [[PhysicalSortExpr { expr: Column { name: \\\"sn\\\", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }]]\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING], mode=[Linear]", " StreamingTableExec: partition_sizes=1, projection=[sn, hash], infinite_source=true, output_ordering=[sn@0 ASC NULLS LAST]", ]; diff --git a/datafusion/proto-common/proto/datafusion_common.proto b/datafusion/proto-common/proto/datafusion_common.proto index 81fc9cceb777d..8cb2726058997 100644 --- a/datafusion/proto-common/proto/datafusion_common.proto +++ b/datafusion/proto-common/proto/datafusion_common.proto @@ -55,6 +55,8 @@ message NdJsonFormat { JsonOptions options = 1; } +message ArrowFormat {} + message PrimaryKeyConstraint{ repeated uint64 indices = 1; diff --git a/datafusion/proto-common/src/generated/pbjson.rs b/datafusion/proto-common/src/generated/pbjson.rs index c3b6686df0054..f35fd15946958 100644 --- a/datafusion/proto-common/src/generated/pbjson.rs +++ b/datafusion/proto-common/src/generated/pbjson.rs @@ -1,3 +1,74 @@ +impl serde::Serialize for ArrowFormat { + #[allow(deprecated)] + fn serialize(&self, serializer: S) -> std::result::Result + where + S: serde::Serializer, + { + use serde::ser::SerializeStruct; + let len = 0; + let struct_ser = serializer.serialize_struct("datafusion_common.ArrowFormat", len)?; + struct_ser.end() + } +} +impl<'de> serde::Deserialize<'de> for ArrowFormat { + #[allow(deprecated)] + fn deserialize(deserializer: D) -> std::result::Result + where + D: serde::Deserializer<'de>, + { + const FIELDS: &[&str] = &[ + ]; + + #[allow(clippy::enum_variant_names)] + enum GeneratedField { + } + impl<'de> serde::Deserialize<'de> for GeneratedField { + fn deserialize(deserializer: D) -> std::result::Result + where + D: serde::Deserializer<'de>, + { + struct GeneratedVisitor; + + impl<'de> serde::de::Visitor<'de> for GeneratedVisitor { + type Value = GeneratedField; + + fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(formatter, "expected one of: {:?}", &FIELDS) + } + + #[allow(unused_variables)] + fn visit_str(self, value: &str) -> std::result::Result + where + E: serde::de::Error, + { + Err(serde::de::Error::unknown_field(value, FIELDS)) + } + } + deserializer.deserialize_identifier(GeneratedVisitor) + } + } + struct GeneratedVisitor; + impl<'de> serde::de::Visitor<'de> for GeneratedVisitor { + type Value = ArrowFormat; + + fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + formatter.write_str("struct datafusion_common.ArrowFormat") + } + + fn visit_map(self, mut map_: V) -> std::result::Result + where + V: serde::de::MapAccess<'de>, + { + while map_.next_key::()?.is_some() { + let _ = map_.next_value::()?; + } + Ok(ArrowFormat { + }) + } + } + deserializer.deserialize_struct("datafusion_common.ArrowFormat", FIELDS, GeneratedVisitor) + } +} impl serde::Serialize for ArrowOptions { #[allow(deprecated)] fn serialize(&self, serializer: S) -> std::result::Result diff --git a/datafusion/proto-common/src/generated/prost.rs b/datafusion/proto-common/src/generated/prost.rs index 411d72af4c624..ac4a9ea4be696 100644 --- a/datafusion/proto-common/src/generated/prost.rs +++ b/datafusion/proto-common/src/generated/prost.rs @@ -45,6 +45,8 @@ pub struct NdJsonFormat { #[prost(message, optional, tag = "1")] pub options: ::core::option::Option, } +#[derive(Clone, Copy, PartialEq, ::prost::Message)] +pub struct ArrowFormat {} #[derive(Clone, PartialEq, ::prost::Message)] pub struct PrimaryKeyConstraint { #[prost(uint64, repeated, tag = "1")] diff --git a/datafusion/proto/Cargo.toml b/datafusion/proto/Cargo.toml index a1eeabdf87f4a..c95f392a051a5 100644 --- a/datafusion/proto/Cargo.toml +++ b/datafusion/proto/Cargo.toml @@ -60,4 +60,5 @@ datafusion-functions = { workspace = true, default-features = true } datafusion-functions-aggregate = { workspace = true } datafusion-functions-window-common = { workspace = true } doc-comment = { workspace = true } +pretty_assertions = "1.4" tokio = { workspace = true, features = ["rt-multi-thread"] } diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index 64789f5de0d22..666a8c7d1f0ac 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -98,6 +98,7 @@ message ListingTableScanNode { datafusion_common.ParquetFormat parquet = 11; datafusion_common.AvroFormat avro = 12; datafusion_common.NdJsonFormat json = 15; + datafusion_common.ArrowFormat arrow = 16; } repeated SortExprNodeCollection file_sort_order = 13; } @@ -859,6 +860,7 @@ message PhysicalScalarUdfNode { optional bytes fun_definition = 3; datafusion_common.ArrowType return_type = 4; bool nullable = 5; + string return_field_name = 6; } message PhysicalAggregateExprNode { @@ -870,6 +872,7 @@ message PhysicalAggregateExprNode { bool distinct = 3; bool ignore_nulls = 6; optional bytes fun_definition = 7; + string human_display = 8; } message PhysicalWindowExprNode { diff --git a/datafusion/proto/src/generated/datafusion_proto_common.rs b/datafusion/proto/src/generated/datafusion_proto_common.rs index 411d72af4c624..ac4a9ea4be696 100644 --- a/datafusion/proto/src/generated/datafusion_proto_common.rs +++ b/datafusion/proto/src/generated/datafusion_proto_common.rs @@ -45,6 +45,8 @@ pub struct NdJsonFormat { #[prost(message, optional, tag = "1")] pub options: ::core::option::Option, } +#[derive(Clone, Copy, PartialEq, ::prost::Message)] +pub struct ArrowFormat {} #[derive(Clone, PartialEq, ::prost::Message)] pub struct PrimaryKeyConstraint { #[prost(uint64, repeated, tag = "1")] diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index 92309ea6a5cbf..c2e6d8ef59654 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -9907,6 +9907,9 @@ impl serde::Serialize for ListingTableScanNode { listing_table_scan_node::FileFormatType::Json(v) => { struct_ser.serialize_field("json", v)?; } + listing_table_scan_node::FileFormatType::Arrow(v) => { + struct_ser.serialize_field("arrow", v)?; + } } } struct_ser.end() @@ -9939,6 +9942,7 @@ impl<'de> serde::Deserialize<'de> for ListingTableScanNode { "parquet", "avro", "json", + "arrow", ]; #[allow(clippy::enum_variant_names)] @@ -9957,6 +9961,7 @@ impl<'de> serde::Deserialize<'de> for ListingTableScanNode { Parquet, Avro, Json, + Arrow, } impl<'de> serde::Deserialize<'de> for GeneratedField { fn deserialize(deserializer: D) -> std::result::Result @@ -9992,6 +9997,7 @@ impl<'de> serde::Deserialize<'de> for ListingTableScanNode { "parquet" => Ok(GeneratedField::Parquet), "avro" => Ok(GeneratedField::Avro), "json" => Ok(GeneratedField::Json), + "arrow" => Ok(GeneratedField::Arrow), _ => Err(serde::de::Error::unknown_field(value, FIELDS)), } } @@ -10112,6 +10118,13 @@ impl<'de> serde::Deserialize<'de> for ListingTableScanNode { return Err(serde::de::Error::duplicate_field("json")); } file_format_type__ = map_.next_value::<::std::option::Option<_>>()?.map(listing_table_scan_node::FileFormatType::Json) +; + } + GeneratedField::Arrow => { + if file_format_type__.is_some() { + return Err(serde::de::Error::duplicate_field("arrow")); + } + file_format_type__ = map_.next_value::<::std::option::Option<_>>()?.map(listing_table_scan_node::FileFormatType::Arrow) ; } } @@ -13619,6 +13632,9 @@ impl serde::Serialize for PhysicalAggregateExprNode { if self.fun_definition.is_some() { len += 1; } + if !self.human_display.is_empty() { + len += 1; + } if self.aggregate_function.is_some() { len += 1; } @@ -13640,6 +13656,9 @@ impl serde::Serialize for PhysicalAggregateExprNode { #[allow(clippy::needless_borrows_for_generic_args)] struct_ser.serialize_field("funDefinition", pbjson::private::base64::encode(&v).as_str())?; } + if !self.human_display.is_empty() { + struct_ser.serialize_field("humanDisplay", &self.human_display)?; + } if let Some(v) = self.aggregate_function.as_ref() { match v { physical_aggregate_expr_node::AggregateFunction::UserDefinedAggrFunction(v) => { @@ -13665,6 +13684,8 @@ impl<'de> serde::Deserialize<'de> for PhysicalAggregateExprNode { "ignoreNulls", "fun_definition", "funDefinition", + "human_display", + "humanDisplay", "user_defined_aggr_function", "userDefinedAggrFunction", ]; @@ -13676,6 +13697,7 @@ impl<'de> serde::Deserialize<'de> for PhysicalAggregateExprNode { Distinct, IgnoreNulls, FunDefinition, + HumanDisplay, UserDefinedAggrFunction, } impl<'de> serde::Deserialize<'de> for GeneratedField { @@ -13703,6 +13725,7 @@ impl<'de> serde::Deserialize<'de> for PhysicalAggregateExprNode { "distinct" => Ok(GeneratedField::Distinct), "ignoreNulls" | "ignore_nulls" => Ok(GeneratedField::IgnoreNulls), "funDefinition" | "fun_definition" => Ok(GeneratedField::FunDefinition), + "humanDisplay" | "human_display" => Ok(GeneratedField::HumanDisplay), "userDefinedAggrFunction" | "user_defined_aggr_function" => Ok(GeneratedField::UserDefinedAggrFunction), _ => Err(serde::de::Error::unknown_field(value, FIELDS)), } @@ -13728,6 +13751,7 @@ impl<'de> serde::Deserialize<'de> for PhysicalAggregateExprNode { let mut distinct__ = None; let mut ignore_nulls__ = None; let mut fun_definition__ = None; + let mut human_display__ = None; let mut aggregate_function__ = None; while let Some(k) = map_.next_key()? { match k { @@ -13763,6 +13787,12 @@ impl<'de> serde::Deserialize<'de> for PhysicalAggregateExprNode { map_.next_value::<::std::option::Option<::pbjson::private::BytesDeserialize<_>>>()?.map(|x| x.0) ; } + GeneratedField::HumanDisplay => { + if human_display__.is_some() { + return Err(serde::de::Error::duplicate_field("humanDisplay")); + } + human_display__ = Some(map_.next_value()?); + } GeneratedField::UserDefinedAggrFunction => { if aggregate_function__.is_some() { return Err(serde::de::Error::duplicate_field("userDefinedAggrFunction")); @@ -13777,6 +13807,7 @@ impl<'de> serde::Deserialize<'de> for PhysicalAggregateExprNode { distinct: distinct__.unwrap_or_default(), ignore_nulls: ignore_nulls__.unwrap_or_default(), fun_definition: fun_definition__, + human_display: human_display__.unwrap_or_default(), aggregate_function: aggregate_function__, }) } @@ -16312,6 +16343,9 @@ impl serde::Serialize for PhysicalScalarUdfNode { if self.nullable { len += 1; } + if !self.return_field_name.is_empty() { + len += 1; + } let mut struct_ser = serializer.serialize_struct("datafusion.PhysicalScalarUdfNode", len)?; if !self.name.is_empty() { struct_ser.serialize_field("name", &self.name)?; @@ -16330,6 +16364,9 @@ impl serde::Serialize for PhysicalScalarUdfNode { if self.nullable { struct_ser.serialize_field("nullable", &self.nullable)?; } + if !self.return_field_name.is_empty() { + struct_ser.serialize_field("returnFieldName", &self.return_field_name)?; + } struct_ser.end() } } @@ -16347,6 +16384,8 @@ impl<'de> serde::Deserialize<'de> for PhysicalScalarUdfNode { "return_type", "returnType", "nullable", + "return_field_name", + "returnFieldName", ]; #[allow(clippy::enum_variant_names)] @@ -16356,6 +16395,7 @@ impl<'de> serde::Deserialize<'de> for PhysicalScalarUdfNode { FunDefinition, ReturnType, Nullable, + ReturnFieldName, } impl<'de> serde::Deserialize<'de> for GeneratedField { fn deserialize(deserializer: D) -> std::result::Result @@ -16382,6 +16422,7 @@ impl<'de> serde::Deserialize<'de> for PhysicalScalarUdfNode { "funDefinition" | "fun_definition" => Ok(GeneratedField::FunDefinition), "returnType" | "return_type" => Ok(GeneratedField::ReturnType), "nullable" => Ok(GeneratedField::Nullable), + "returnFieldName" | "return_field_name" => Ok(GeneratedField::ReturnFieldName), _ => Err(serde::de::Error::unknown_field(value, FIELDS)), } } @@ -16406,6 +16447,7 @@ impl<'de> serde::Deserialize<'de> for PhysicalScalarUdfNode { let mut fun_definition__ = None; let mut return_type__ = None; let mut nullable__ = None; + let mut return_field_name__ = None; while let Some(k) = map_.next_key()? { match k { GeneratedField::Name => { @@ -16440,6 +16482,12 @@ impl<'de> serde::Deserialize<'de> for PhysicalScalarUdfNode { } nullable__ = Some(map_.next_value()?); } + GeneratedField::ReturnFieldName => { + if return_field_name__.is_some() { + return Err(serde::de::Error::duplicate_field("returnFieldName")); + } + return_field_name__ = Some(map_.next_value()?); + } } } Ok(PhysicalScalarUdfNode { @@ -16448,6 +16496,7 @@ impl<'de> serde::Deserialize<'de> for PhysicalScalarUdfNode { fun_definition: fun_definition__, return_type: return_type__, nullable: nullable__.unwrap_or_default(), + return_field_name: return_field_name__.unwrap_or_default(), }) } } diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index b0fc0ce60436d..35491366dcac6 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -123,7 +123,10 @@ pub struct ListingTableScanNode { pub target_partitions: u32, #[prost(message, repeated, tag = "13")] pub file_sort_order: ::prost::alloc::vec::Vec, - #[prost(oneof = "listing_table_scan_node::FileFormatType", tags = "10, 11, 12, 15")] + #[prost( + oneof = "listing_table_scan_node::FileFormatType", + tags = "10, 11, 12, 15, 16" + )] pub file_format_type: ::core::option::Option< listing_table_scan_node::FileFormatType, >, @@ -140,6 +143,8 @@ pub mod listing_table_scan_node { Avro(super::super::datafusion_common::AvroFormat), #[prost(message, tag = "15")] Json(super::super::datafusion_common::NdJsonFormat), + #[prost(message, tag = "16")] + Arrow(super::super::datafusion_common::ArrowFormat), } } #[derive(Clone, PartialEq, ::prost::Message)] @@ -1305,6 +1310,8 @@ pub struct PhysicalScalarUdfNode { pub return_type: ::core::option::Option, #[prost(bool, tag = "5")] pub nullable: bool, + #[prost(string, tag = "6")] + pub return_field_name: ::prost::alloc::string::String, } #[derive(Clone, PartialEq, ::prost::Message)] pub struct PhysicalAggregateExprNode { @@ -1318,6 +1325,8 @@ pub struct PhysicalAggregateExprNode { pub ignore_nulls: bool, #[prost(bytes = "vec", optional, tag = "7")] pub fun_definition: ::core::option::Option<::prost::alloc::vec::Vec>, + #[prost(string, tag = "8")] + pub human_display: ::prost::alloc::string::String, #[prost(oneof = "physical_aggregate_expr_node::AggregateFunction", tags = "4")] pub aggregate_function: ::core::option::Option< physical_aggregate_expr_node::AggregateFunction, diff --git a/datafusion/proto/src/lib.rs b/datafusion/proto/src/lib.rs index 2df162f21e3a3..b4d72aa1b6cb3 100644 --- a/datafusion/proto/src/lib.rs +++ b/datafusion/proto/src/lib.rs @@ -130,8 +130,9 @@ pub mod protobuf { pub use crate::generated::datafusion::*; pub use datafusion_proto_common::common::proto_error; pub use datafusion_proto_common::protobuf_common::{ - ArrowOptions, ArrowType, AvroFormat, AvroOptions, CsvFormat, DfSchema, - EmptyMessage, Field, JoinSide, NdJsonFormat, ParquetFormat, ScalarValue, Schema, + ArrowFormat, ArrowOptions, ArrowType, AvroFormat, AvroOptions, CsvFormat, + DfSchema, EmptyMessage, Field, JoinSide, NdJsonFormat, ParquetFormat, + ScalarValue, Schema, }; pub use datafusion_proto_common::{FromProtoError, ToProtoError}; } diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index 66ef0ebfe3610..6c5b348698c75 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -576,10 +576,7 @@ pub fn parse_expr( parse_exprs(&pb.args, registry, codec)?, pb.distinct, parse_optional_expr(pb.filter.as_deref(), registry, codec)?.map(Box::new), - match pb.order_by.len() { - 0 => None, - _ => Some(parse_sorts(&pb.order_by, registry, codec)?), - }, + parse_sorts(&pb.order_by, registry, codec)?, None, ))) } diff --git a/datafusion/proto/src/logical_plan/mod.rs b/datafusion/proto/src/logical_plan/mod.rs index 1acf1ee27bfef..576a51707c963 100644 --- a/datafusion/proto/src/logical_plan/mod.rs +++ b/datafusion/proto/src/logical_plan/mod.rs @@ -35,6 +35,7 @@ use crate::{ use crate::protobuf::{proto_error, ToProtoError}; use arrow::datatypes::{DataType, Schema, SchemaBuilder, SchemaRef}; use datafusion::datasource::cte_worktable::CteWorkTable; +use datafusion::datasource::file_format::arrow::ArrowFormat; #[cfg(feature = "avro")] use datafusion::datasource::file_format::avro::AvroFormat; #[cfg(feature = "parquet")] @@ -71,8 +72,7 @@ use datafusion_expr::{ Statement, WindowUDF, }; use datafusion_expr::{ - AggregateUDF, ColumnUnnestList, DmlStatement, FetchType, RecursiveQuery, SkipType, - TableSource, Unnest, + AggregateUDF, DmlStatement, FetchType, RecursiveQuery, SkipType, TableSource, Unnest, }; use self::to_proto::{serialize_expr, serialize_exprs}; @@ -440,13 +440,16 @@ impl AsLogicalPlan for LogicalPlanNode { } #[cfg_attr(not(feature = "avro"), allow(unused_variables))] FileFormatType::Avro(..) => { - #[cfg(feature = "avro")] + #[cfg(feature = "avro")] { Arc::new(AvroFormat) } #[cfg(not(feature = "avro"))] panic!("Unable to process avro file since `avro` feature is not enabled"); } + FileFormatType::Arrow(..) => { + Arc::new(ArrowFormat) + } }; let table_paths = &scan @@ -905,51 +908,24 @@ impl AsLogicalPlan for LogicalPlanNode { extension_codec.try_decode_file_format(©.file_type, ctx)?, ); - Ok(LogicalPlan::Copy(dml::CopyTo { - input: Arc::new(input), - output_url: copy.output_url.clone(), - partition_by: copy.partition_by.clone(), + Ok(LogicalPlan::Copy(dml::CopyTo::new( + Arc::new(input), + copy.output_url.clone(), + copy.partition_by.clone(), file_type, - options: Default::default(), - })) + Default::default(), + ))) } LogicalPlanType::Unnest(unnest) => { let input: LogicalPlan = into_logical_plan!(unnest.input, ctx, extension_codec)?; - Ok(LogicalPlan::Unnest(Unnest { - input: Arc::new(input), - exec_columns: unnest.exec_columns.iter().map(|c| c.into()).collect(), - list_type_columns: unnest - .list_type_columns - .iter() - .map(|c| { - let recursion_item = c.recursion.as_ref().unwrap(); - ( - c.input_index as _, - ColumnUnnestList { - output_column: recursion_item - .output_column - .as_ref() - .unwrap() - .into(), - depth: recursion_item.depth as _, - }, - ) - }) - .collect(), - struct_type_columns: unnest - .struct_type_columns - .iter() - .map(|c| *c as usize) - .collect(), - dependency_indices: unnest - .dependency_indices - .iter() - .map(|c| *c as usize) - .collect(), - schema: Arc::new(convert_required!(unnest.schema)?), - options: into_required!(unnest.options)?, - })) + + LogicalPlanBuilder::from(input) + .unnest_columns_with_options( + unnest.exec_columns.iter().map(|c| c.into()).collect(), + into_required!(unnest.options)?, + )? + .build() } LogicalPlanType::RecursiveQuery(recursive_query_node) => { let static_term = recursive_query_node @@ -1085,13 +1061,18 @@ impl AsLogicalPlan for LogicalPlanNode { Some(FileFormatType::Avro(protobuf::AvroFormat {})) } + if any.is::() { + maybe_some_type = + Some(FileFormatType::Arrow(protobuf::ArrowFormat {})) + } + if let Some(file_format_type) = maybe_some_type { file_format_type } else { return Err(proto_error(format!( - "Error converting file format, {:?} is invalid as a datafusion format.", - listing_table.options().format - ))); + "Error deserializing unknown file format: {:?}", + listing_table.options().format + ))); } }; diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs index b14ad7aadf583..43afaa0fbe655 100644 --- a/datafusion/proto/src/logical_plan/to_proto.rs +++ b/datafusion/proto/src/logical_plan/to_proto.rs @@ -374,10 +374,7 @@ pub fn serialize_expr( Some(e) => Some(Box::new(serialize_expr(e.as_ref(), codec)?)), None => None, }, - order_by: match order_by { - Some(e) => serialize_sorts(e, codec)?, - None => vec![], - }, + order_by: serialize_sorts(order_by, codec)?, fun_definition: (!buf.is_empty()).then_some(buf), }, ))), diff --git a/datafusion/proto/src/physical_plan/from_proto.rs b/datafusion/proto/src/physical_plan/from_proto.rs index 1c60470b2218f..a42bf38044170 100644 --- a/datafusion/proto/src/physical_plan/from_proto.rs +++ b/datafusion/proto/src/physical_plan/from_proto.rs @@ -45,6 +45,8 @@ use datafusion::physical_plan::expressions::{ }; use datafusion::physical_plan::windows::{create_window_expr, schema_add_window_field}; use datafusion::physical_plan::{Partitioning, PhysicalExpr, WindowExpr}; +use datafusion::prelude::SessionContext; +use datafusion_common::config::ConfigOptions; use datafusion_common::{not_impl_err, DataFusionError, Result}; use datafusion_proto_common::common::proto_error; @@ -72,12 +74,12 @@ impl From<&protobuf::PhysicalColumn> for Column { /// * `codec` - An extension codec used to decode custom UDFs. pub fn parse_physical_sort_expr( proto: &protobuf::PhysicalSortExprNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, input_schema: &Schema, codec: &dyn PhysicalExtensionCodec, ) -> Result { if let Some(expr) = &proto.expr { - let expr = parse_physical_expr(expr.as_ref(), registry, input_schema, codec)?; + let expr = parse_physical_expr(expr.as_ref(), ctx, input_schema, codec)?; let options = SortOptions { descending: !proto.asc, nulls_first: proto.nulls_first, @@ -99,15 +101,13 @@ pub fn parse_physical_sort_expr( /// * `codec` - An extension codec used to decode custom UDFs. pub fn parse_physical_sort_exprs( proto: &[protobuf::PhysicalSortExprNode], - registry: &dyn FunctionRegistry, + ctx: &SessionContext, input_schema: &Schema, codec: &dyn PhysicalExtensionCodec, ) -> Result> { proto .iter() - .map(|sort_expr| { - parse_physical_sort_expr(sort_expr, registry, input_schema, codec) - }) + .map(|sort_expr| parse_physical_sort_expr(sort_expr, ctx, input_schema, codec)) .collect() } @@ -123,17 +123,15 @@ pub fn parse_physical_sort_exprs( /// * `codec` - An extension codec used to decode custom UDFs. pub fn parse_physical_window_expr( proto: &protobuf::PhysicalWindowExprNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, input_schema: &Schema, codec: &dyn PhysicalExtensionCodec, ) -> Result> { - let window_node_expr = - parse_physical_exprs(&proto.args, registry, input_schema, codec)?; + let window_node_expr = parse_physical_exprs(&proto.args, ctx, input_schema, codec)?; let partition_by = - parse_physical_exprs(&proto.partition_by, registry, input_schema, codec)?; + parse_physical_exprs(&proto.partition_by, ctx, input_schema, codec)?; - let order_by = - parse_physical_sort_exprs(&proto.order_by, registry, input_schema, codec)?; + let order_by = parse_physical_sort_exprs(&proto.order_by, ctx, input_schema, codec)?; let window_frame = proto .window_frame @@ -152,13 +150,13 @@ pub fn parse_physical_window_expr( protobuf::physical_window_expr_node::WindowFunction::UserDefinedAggrFunction(udaf_name) => { WindowFunctionDefinition::AggregateUDF(match &proto.fun_definition { Some(buf) => codec.try_decode_udaf(udaf_name, buf)?, - None => registry.udaf(udaf_name).or_else(|_| codec.try_decode_udaf(udaf_name, &[]))?, + None => ctx.udaf(udaf_name).or_else(|_| codec.try_decode_udaf(udaf_name, &[]))?, }) } protobuf::physical_window_expr_node::WindowFunction::UserDefinedWindowFunction(udwf_name) => { WindowFunctionDefinition::WindowUDF(match &proto.fun_definition { Some(buf) => codec.try_decode_udwf(udwf_name, buf)?, - None => registry.udwf(udwf_name).or_else(|_| codec.try_decode_udwf(udwf_name, &[]))? + None => ctx.udwf(udwf_name).or_else(|_| codec.try_decode_udwf(udwf_name, &[]))? }) } } @@ -184,7 +182,7 @@ pub fn parse_physical_window_expr( pub fn parse_physical_exprs<'a, I>( protos: I, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, input_schema: &Schema, codec: &dyn PhysicalExtensionCodec, ) -> Result>> @@ -193,7 +191,7 @@ where { protos .into_iter() - .map(|p| parse_physical_expr(p, registry, input_schema, codec)) + .map(|p| parse_physical_expr(p, ctx, input_schema, codec)) .collect::>>() } @@ -208,7 +206,7 @@ where /// * `codec` - An extension codec used to decode custom UDFs. pub fn parse_physical_expr( proto: &protobuf::PhysicalExprNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, input_schema: &Schema, codec: &dyn PhysicalExtensionCodec, ) -> Result> { @@ -227,7 +225,7 @@ pub fn parse_physical_expr( ExprType::BinaryExpr(binary_expr) => Arc::new(BinaryExpr::new( parse_required_physical_expr( binary_expr.l.as_deref(), - registry, + ctx, "left", input_schema, codec, @@ -235,7 +233,7 @@ pub fn parse_physical_expr( logical_plan::from_proto::from_proto_binary_op(&binary_expr.op)?, parse_required_physical_expr( binary_expr.r.as_deref(), - registry, + ctx, "right", input_schema, codec, @@ -257,7 +255,7 @@ pub fn parse_physical_expr( ExprType::IsNullExpr(e) => { Arc::new(IsNullExpr::new(parse_required_physical_expr( e.expr.as_deref(), - registry, + ctx, "expr", input_schema, codec, @@ -266,7 +264,7 @@ pub fn parse_physical_expr( ExprType::IsNotNullExpr(e) => { Arc::new(IsNotNullExpr::new(parse_required_physical_expr( e.expr.as_deref(), - registry, + ctx, "expr", input_schema, codec, @@ -274,7 +272,7 @@ pub fn parse_physical_expr( } ExprType::NotExpr(e) => Arc::new(NotExpr::new(parse_required_physical_expr( e.expr.as_deref(), - registry, + ctx, "expr", input_schema, codec, @@ -282,7 +280,7 @@ pub fn parse_physical_expr( ExprType::Negative(e) => { Arc::new(NegativeExpr::new(parse_required_physical_expr( e.expr.as_deref(), - registry, + ctx, "expr", input_schema, codec, @@ -291,19 +289,19 @@ pub fn parse_physical_expr( ExprType::InList(e) => in_list( parse_required_physical_expr( e.expr.as_deref(), - registry, + ctx, "expr", input_schema, codec, )?, - parse_physical_exprs(&e.list, registry, input_schema, codec)?, + parse_physical_exprs(&e.list, ctx, input_schema, codec)?, &e.negated, input_schema, )?, ExprType::Case(e) => Arc::new(CaseExpr::try_new( e.expr .as_ref() - .map(|e| parse_physical_expr(e.as_ref(), registry, input_schema, codec)) + .map(|e| parse_physical_expr(e.as_ref(), ctx, input_schema, codec)) .transpose()?, e.when_then_expr .iter() @@ -311,14 +309,14 @@ pub fn parse_physical_expr( Ok(( parse_required_physical_expr( e.when_expr.as_ref(), - registry, + ctx, "when_expr", input_schema, codec, )?, parse_required_physical_expr( e.then_expr.as_ref(), - registry, + ctx, "then_expr", input_schema, codec, @@ -328,13 +326,13 @@ pub fn parse_physical_expr( .collect::>>()?, e.else_expr .as_ref() - .map(|e| parse_physical_expr(e.as_ref(), registry, input_schema, codec)) + .map(|e| parse_physical_expr(e.as_ref(), ctx, input_schema, codec)) .transpose()?, )?), ExprType::Cast(e) => Arc::new(CastExpr::new( parse_required_physical_expr( e.expr.as_deref(), - registry, + ctx, "expr", input_schema, codec, @@ -345,7 +343,7 @@ pub fn parse_physical_expr( ExprType::TryCast(e) => Arc::new(TryCastExpr::new( parse_required_physical_expr( e.expr.as_deref(), - registry, + ctx, "expr", input_schema, codec, @@ -355,20 +353,31 @@ pub fn parse_physical_expr( ExprType::ScalarUdf(e) => { let udf = match &e.fun_definition { Some(buf) => codec.try_decode_udf(&e.name, buf)?, - None => registry + None => ctx .udf(e.name.as_str()) .or_else(|_| codec.try_decode_udf(&e.name, &[]))?, }; let scalar_fun_def = Arc::clone(&udf); - let args = parse_physical_exprs(&e.args, registry, input_schema, codec)?; + let args = parse_physical_exprs(&e.args, ctx, input_schema, codec)?; + let config_options = + match ctx.state().execution_props().config_options.as_ref() { + Some(config_options) => Arc::clone(config_options), + None => Arc::new(ConfigOptions::default()), + }; Arc::new( ScalarFunctionExpr::new( e.name.as_str(), scalar_fun_def, args, - Field::new("f", convert_required!(e.return_type)?, true).into(), + Field::new( + &e.return_field_name, + convert_required!(e.return_type)?, + true, + ) + .into(), + config_options, ) .with_nullable(e.nullable), ) @@ -378,14 +387,14 @@ pub fn parse_physical_expr( like_expr.case_insensitive, parse_required_physical_expr( like_expr.expr.as_deref(), - registry, + ctx, "expr", input_schema, codec, )?, parse_required_physical_expr( like_expr.pattern.as_deref(), - registry, + ctx, "pattern", input_schema, codec, @@ -395,7 +404,7 @@ pub fn parse_physical_expr( let inputs: Vec> = extension .inputs .iter() - .map(|e| parse_physical_expr(e, registry, input_schema, codec)) + .map(|e| parse_physical_expr(e, ctx, input_schema, codec)) .collect::>()?; (codec.try_decode_expr(extension.expr.as_slice(), &inputs)?) as _ } @@ -406,12 +415,12 @@ pub fn parse_physical_expr( fn parse_required_physical_expr( expr: Option<&protobuf::PhysicalExprNode>, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, field: &str, input_schema: &Schema, codec: &dyn PhysicalExtensionCodec, ) -> Result> { - expr.map(|e| parse_physical_expr(e, registry, input_schema, codec)) + expr.map(|e| parse_physical_expr(e, ctx, input_schema, codec)) .transpose()? .ok_or_else(|| { DataFusionError::Internal(format!("Missing required field {field:?}")) @@ -420,18 +429,14 @@ fn parse_required_physical_expr( pub fn parse_protobuf_hash_partitioning( partitioning: Option<&protobuf::PhysicalHashRepartition>, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, input_schema: &Schema, codec: &dyn PhysicalExtensionCodec, ) -> Result> { match partitioning { Some(hash_part) => { - let expr = parse_physical_exprs( - &hash_part.hash_expr, - registry, - input_schema, - codec, - )?; + let expr = + parse_physical_exprs(&hash_part.hash_expr, ctx, input_schema, codec)?; Ok(Some(Partitioning::Hash( expr, @@ -444,7 +449,7 @@ pub fn parse_protobuf_hash_partitioning( pub fn parse_protobuf_partitioning( partitioning: Option<&protobuf::Partitioning>, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, input_schema: &Schema, codec: &dyn PhysicalExtensionCodec, ) -> Result> { @@ -458,7 +463,7 @@ pub fn parse_protobuf_partitioning( Some(protobuf::partitioning::PartitionMethod::Hash(hash_repartition)) => { parse_protobuf_hash_partitioning( Some(hash_repartition), - registry, + ctx, input_schema, codec, ) @@ -482,7 +487,7 @@ pub fn parse_protobuf_file_scan_schema( pub fn parse_protobuf_file_scan_config( proto: &protobuf::FileScanExecConf, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, codec: &dyn PhysicalExtensionCodec, file_source: Arc, ) -> Result { @@ -530,7 +535,7 @@ pub fn parse_protobuf_file_scan_config( for node_collection in &proto.output_ordering { let sort_exprs = parse_physical_sort_exprs( &node_collection.physical_sort_expr_nodes, - registry, + ctx, &schema, codec, )?; diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs index 242b36786d078..29de8ac96b4d0 100644 --- a/datafusion/proto/src/physical_plan/mod.rs +++ b/datafusion/proto/src/physical_plan/mod.rs @@ -39,7 +39,7 @@ use crate::protobuf::{ use crate::{convert_required, into_required}; use datafusion::arrow::compute::SortOptions; -use datafusion::arrow::datatypes::SchemaRef; +use datafusion::arrow::datatypes::{Schema, SchemaRef}; use datafusion::datasource::file_format::csv::CsvSink; use datafusion::datasource::file_format::file_compression_type::FileCompressionType; use datafusion::datasource::file_format::json::JsonSink; @@ -90,6 +90,7 @@ use datafusion_common::config::TableParquetOptions; use datafusion_common::{internal_err, not_impl_err, DataFusionError, Result}; use datafusion_expr::{AggregateUDF, ScalarUDF, WindowUDF}; +use datafusion::prelude::SessionContext; use prost::bytes::BufMut; use prost::Message; @@ -118,7 +119,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { fn try_into_physical_plan( &self, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { @@ -130,204 +131,159 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { match plan { PhysicalPlanType::Explain(explain) => self.try_into_explain_physical_plan( explain, - registry, + ctx, runtime, extension_codec, ), PhysicalPlanType::Projection(projection) => self .try_into_projection_physical_plan( projection, - registry, + ctx, runtime, extension_codec, ), - PhysicalPlanType::Filter(filter) => self.try_into_filter_physical_plan( - filter, - registry, - runtime, - extension_codec, - ), - PhysicalPlanType::CsvScan(scan) => self.try_into_csv_scan_physical_plan( - scan, - registry, - runtime, - extension_codec, - ), - PhysicalPlanType::JsonScan(scan) => self.try_into_json_scan_physical_plan( - scan, - registry, - runtime, - extension_codec, - ), + PhysicalPlanType::Filter(filter) => { + self.try_into_filter_physical_plan(filter, ctx, runtime, extension_codec) + } + PhysicalPlanType::CsvScan(scan) => { + self.try_into_csv_scan_physical_plan(scan, ctx, runtime, extension_codec) + } + PhysicalPlanType::JsonScan(scan) => { + self.try_into_json_scan_physical_plan(scan, ctx, runtime, extension_codec) + } #[cfg_attr(not(feature = "parquet"), allow(unused_variables))] PhysicalPlanType::ParquetScan(scan) => self - .try_into_parquet_scan_physical_plan( - scan, - registry, - runtime, - extension_codec, - ), + .try_into_parquet_scan_physical_plan(scan, ctx, runtime, extension_codec), #[cfg_attr(not(feature = "avro"), allow(unused_variables))] - PhysicalPlanType::AvroScan(scan) => self.try_into_avro_scan_physical_plan( - scan, - registry, - runtime, - extension_codec, - ), + PhysicalPlanType::AvroScan(scan) => { + self.try_into_avro_scan_physical_plan(scan, ctx, runtime, extension_codec) + } PhysicalPlanType::CoalesceBatches(coalesce_batches) => self .try_into_coalesce_batches_physical_plan( coalesce_batches, - registry, + ctx, runtime, extension_codec, ), - PhysicalPlanType::Merge(merge) => self.try_into_merge_physical_plan( - merge, - registry, - runtime, - extension_codec, - ), + PhysicalPlanType::Merge(merge) => { + self.try_into_merge_physical_plan(merge, ctx, runtime, extension_codec) + } PhysicalPlanType::Repartition(repart) => self .try_into_repartition_physical_plan( repart, - registry, + ctx, runtime, extension_codec, ), PhysicalPlanType::GlobalLimit(limit) => self .try_into_global_limit_physical_plan( limit, - registry, + ctx, runtime, extension_codec, ), PhysicalPlanType::LocalLimit(limit) => self - .try_into_local_limit_physical_plan( - limit, - registry, - runtime, - extension_codec, - ), + .try_into_local_limit_physical_plan(limit, ctx, runtime, extension_codec), PhysicalPlanType::Window(window_agg) => self.try_into_window_physical_plan( window_agg, - registry, + ctx, runtime, extension_codec, ), PhysicalPlanType::Aggregate(hash_agg) => self .try_into_aggregate_physical_plan( hash_agg, - registry, + ctx, runtime, extension_codec, ), PhysicalPlanType::HashJoin(hashjoin) => self .try_into_hash_join_physical_plan( hashjoin, - registry, + ctx, runtime, extension_codec, ), PhysicalPlanType::SymmetricHashJoin(sym_join) => self .try_into_symmetric_hash_join_physical_plan( sym_join, - registry, + ctx, runtime, extension_codec, ), - PhysicalPlanType::Union(union) => self.try_into_union_physical_plan( - union, - registry, - runtime, - extension_codec, - ), + PhysicalPlanType::Union(union) => { + self.try_into_union_physical_plan(union, ctx, runtime, extension_codec) + } PhysicalPlanType::Interleave(interleave) => self .try_into_interleave_physical_plan( interleave, - registry, + ctx, runtime, extension_codec, ), PhysicalPlanType::CrossJoin(crossjoin) => self .try_into_cross_join_physical_plan( crossjoin, - registry, + ctx, runtime, extension_codec, ), - PhysicalPlanType::Empty(empty) => self.try_into_empty_physical_plan( - empty, - registry, - runtime, - extension_codec, - ), + PhysicalPlanType::Empty(empty) => { + self.try_into_empty_physical_plan(empty, ctx, runtime, extension_codec) + } PhysicalPlanType::PlaceholderRow(placeholder) => self .try_into_placeholder_row_physical_plan( placeholder, - registry, + ctx, runtime, extension_codec, ), PhysicalPlanType::Sort(sort) => { - self.try_into_sort_physical_plan(sort, registry, runtime, extension_codec) + self.try_into_sort_physical_plan(sort, ctx, runtime, extension_codec) } PhysicalPlanType::SortPreservingMerge(sort) => self .try_into_sort_preserving_merge_physical_plan( sort, - registry, + ctx, runtime, extension_codec, ), PhysicalPlanType::Extension(extension) => self .try_into_extension_physical_plan( extension, - registry, + ctx, runtime, extension_codec, ), PhysicalPlanType::NestedLoopJoin(join) => self .try_into_nested_loop_join_physical_plan( join, - registry, + ctx, runtime, extension_codec, ), PhysicalPlanType::Analyze(analyze) => self.try_into_analyze_physical_plan( analyze, - registry, - runtime, - extension_codec, - ), - PhysicalPlanType::JsonSink(sink) => self.try_into_json_sink_physical_plan( - sink, - registry, - runtime, - extension_codec, - ), - PhysicalPlanType::CsvSink(sink) => self.try_into_csv_sink_physical_plan( - sink, - registry, + ctx, runtime, extension_codec, ), + PhysicalPlanType::JsonSink(sink) => { + self.try_into_json_sink_physical_plan(sink, ctx, runtime, extension_codec) + } + PhysicalPlanType::CsvSink(sink) => { + self.try_into_csv_sink_physical_plan(sink, ctx, runtime, extension_codec) + } #[cfg_attr(not(feature = "parquet"), allow(unused_variables))] PhysicalPlanType::ParquetSink(sink) => self - .try_into_parquet_sink_physical_plan( - sink, - registry, - runtime, - extension_codec, - ), - PhysicalPlanType::Unnest(unnest) => self.try_into_unnest_physical_plan( - unnest, - registry, - runtime, - extension_codec, - ), + .try_into_parquet_sink_physical_plan(sink, ctx, runtime, extension_codec), + PhysicalPlanType::Unnest(unnest) => { + self.try_into_unnest_physical_plan(unnest, ctx, runtime, extension_codec) + } PhysicalPlanType::Cooperative(cooperative) => self .try_into_cooperative_physical_plan( cooperative, - registry, + ctx, runtime, extension_codec, ), @@ -559,7 +515,7 @@ impl protobuf::PhysicalPlanNode { fn try_into_explain_physical_plan( &self, explain: &protobuf::ExplainExecNode, - _registry: &dyn FunctionRegistry, + _ctx: &SessionContext, _runtime: &RuntimeEnv, _extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { @@ -577,12 +533,12 @@ impl protobuf::PhysicalPlanNode { fn try_into_projection_physical_plan( &self, projection: &protobuf::ProjectionExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { let input: Arc = - into_physical_plan(&projection.input, registry, runtime, extension_codec)?; + into_physical_plan(&projection.input, ctx, runtime, extension_codec)?; let exprs = projection .expr .iter() @@ -591,7 +547,7 @@ impl protobuf::PhysicalPlanNode { Ok(( parse_physical_expr( expr, - registry, + ctx, input.schema().as_ref(), extension_codec, )?, @@ -605,22 +561,41 @@ impl protobuf::PhysicalPlanNode { fn try_into_filter_physical_plan( &self, filter: &protobuf::FilterExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { let input: Arc = - into_physical_plan(&filter.input, registry, runtime, extension_codec)?; + into_physical_plan(&filter.input, ctx, runtime, extension_codec)?; + let projection = if !filter.projection.is_empty() { + Some( + filter + .projection + .iter() + .map(|i| *i as usize) + .collect::>(), + ) + } else { + None + }; + + // Use the projected schema if projection is present, otherwise use the full schema + let predicate_schema = if let Some(ref proj_indices) = projection { + // Create projected schema for parsing the predicate + let projected_fields: Vec<_> = proj_indices + .iter() + .map(|&i| input.schema().field(i).clone()) + .collect(); + Arc::new(Schema::new(projected_fields)) + } else { + input.schema() + }; + let predicate = filter .expr .as_ref() .map(|expr| { - parse_physical_expr( - expr, - registry, - input.schema().as_ref(), - extension_codec, - ) + parse_physical_expr(expr, ctx, predicate_schema.as_ref(), extension_codec) }) .transpose()? .ok_or_else(|| { @@ -629,17 +604,6 @@ impl protobuf::PhysicalPlanNode { ) })?; let filter_selectivity = filter.default_filter_selectivity.try_into(); - let projection = if !filter.projection.is_empty() { - Some( - filter - .projection - .iter() - .map(|i| *i as usize) - .collect::>(), - ) - } else { - None - }; let filter = FilterExec::try_new(predicate, input)?.with_projection(projection)?; match filter_selectivity { @@ -655,7 +619,7 @@ impl protobuf::PhysicalPlanNode { fn try_into_csv_scan_physical_plan( &self, scan: &protobuf::CsvScanExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, _runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { @@ -689,7 +653,7 @@ impl protobuf::PhysicalPlanNode { let conf = FileScanConfigBuilder::from(parse_protobuf_file_scan_config( scan.base_conf.as_ref().unwrap(), - registry, + ctx, extension_codec, source, )?) @@ -702,13 +666,13 @@ impl protobuf::PhysicalPlanNode { fn try_into_json_scan_physical_plan( &self, scan: &protobuf::JsonScanExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, _runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { let scan_conf = parse_protobuf_file_scan_config( scan.base_conf.as_ref().unwrap(), - registry, + ctx, extension_codec, Arc::new(JsonSource::new()), )?; @@ -719,7 +683,7 @@ impl protobuf::PhysicalPlanNode { fn try_into_parquet_scan_physical_plan( &self, scan: &protobuf::ParquetScanExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, _runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { @@ -727,11 +691,31 @@ impl protobuf::PhysicalPlanNode { { let schema = parse_protobuf_file_scan_schema(scan.base_conf.as_ref().unwrap())?; + + // Check if there's a projection and use projected schema for predicate parsing + let base_conf = scan.base_conf.as_ref().unwrap(); + let predicate_schema = if !base_conf.projection.is_empty() { + // Create projected schema for parsing the predicate + let projected_fields: Vec<_> = base_conf + .projection + .iter() + .map(|&i| schema.field(i as usize).clone()) + .collect(); + Arc::new(Schema::new(projected_fields)) + } else { + schema + }; + let predicate = scan .predicate .as_ref() .map(|expr| { - parse_physical_expr(expr, registry, schema.as_ref(), extension_codec) + parse_physical_expr( + expr, + ctx, + predicate_schema.as_ref(), + extension_codec, + ) }) .transpose()?; let mut options = TableParquetOptions::default(); @@ -745,8 +729,8 @@ impl protobuf::PhysicalPlanNode { source = source.with_predicate(predicate); } let base_config = parse_protobuf_file_scan_config( - scan.base_conf.as_ref().unwrap(), - registry, + base_conf, + ctx, extension_codec, Arc::new(source), )?; @@ -760,7 +744,7 @@ impl protobuf::PhysicalPlanNode { fn try_into_avro_scan_physical_plan( &self, scan: &protobuf::AvroScanExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, _runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { @@ -768,7 +752,7 @@ impl protobuf::PhysicalPlanNode { { let conf = parse_protobuf_file_scan_config( scan.base_conf.as_ref().unwrap(), - registry, + ctx, extension_codec, Arc::new(AvroSource::new()), )?; @@ -781,16 +765,12 @@ impl protobuf::PhysicalPlanNode { fn try_into_coalesce_batches_physical_plan( &self, coalesce_batches: &protobuf::CoalesceBatchesExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { - let input: Arc = into_physical_plan( - &coalesce_batches.input, - registry, - runtime, - extension_codec, - )?; + let input: Arc = + into_physical_plan(&coalesce_batches.input, ctx, runtime, extension_codec)?; Ok(Arc::new( CoalesceBatchesExec::new(input, coalesce_batches.target_batch_size as usize) .with_fetch(coalesce_batches.fetch.map(|f| f as usize)), @@ -800,12 +780,12 @@ impl protobuf::PhysicalPlanNode { fn try_into_merge_physical_plan( &self, merge: &protobuf::CoalescePartitionsExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { let input: Arc = - into_physical_plan(&merge.input, registry, runtime, extension_codec)?; + into_physical_plan(&merge.input, ctx, runtime, extension_codec)?; Ok(Arc::new( CoalescePartitionsExec::new(input) .with_fetch(merge.fetch.map(|f| f as usize)), @@ -815,15 +795,15 @@ impl protobuf::PhysicalPlanNode { fn try_into_repartition_physical_plan( &self, repart: &protobuf::RepartitionExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { let input: Arc = - into_physical_plan(&repart.input, registry, runtime, extension_codec)?; + into_physical_plan(&repart.input, ctx, runtime, extension_codec)?; let partitioning = parse_protobuf_partitioning( repart.partitioning.as_ref(), - registry, + ctx, input.schema().as_ref(), extension_codec, )?; @@ -836,12 +816,12 @@ impl protobuf::PhysicalPlanNode { fn try_into_global_limit_physical_plan( &self, limit: &protobuf::GlobalLimitExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { let input: Arc = - into_physical_plan(&limit.input, registry, runtime, extension_codec)?; + into_physical_plan(&limit.input, ctx, runtime, extension_codec)?; let fetch = if limit.fetch >= 0 { Some(limit.fetch as usize) } else { @@ -857,24 +837,24 @@ impl protobuf::PhysicalPlanNode { fn try_into_local_limit_physical_plan( &self, limit: &protobuf::LocalLimitExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { let input: Arc = - into_physical_plan(&limit.input, registry, runtime, extension_codec)?; + into_physical_plan(&limit.input, ctx, runtime, extension_codec)?; Ok(Arc::new(LocalLimitExec::new(input, limit.fetch as usize))) } fn try_into_window_physical_plan( &self, window_agg: &protobuf::WindowAggExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { let input: Arc = - into_physical_plan(&window_agg.input, registry, runtime, extension_codec)?; + into_physical_plan(&window_agg.input, ctx, runtime, extension_codec)?; let input_schema = input.schema(); let physical_window_expr: Vec> = window_agg @@ -883,7 +863,7 @@ impl protobuf::PhysicalPlanNode { .map(|window_expr| { parse_physical_window_expr( window_expr, - registry, + ctx, input_schema.as_ref(), extension_codec, ) @@ -894,12 +874,7 @@ impl protobuf::PhysicalPlanNode { .partition_keys .iter() .map(|expr| { - parse_physical_expr( - expr, - registry, - input.schema().as_ref(), - extension_codec, - ) + parse_physical_expr(expr, ctx, input.schema().as_ref(), extension_codec) }) .collect::>>>()?; @@ -932,12 +907,12 @@ impl protobuf::PhysicalPlanNode { fn try_into_aggregate_physical_plan( &self, hash_agg: &protobuf::AggregateExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { let input: Arc = - into_physical_plan(&hash_agg.input, registry, runtime, extension_codec)?; + into_physical_plan(&hash_agg.input, ctx, runtime, extension_codec)?; let mode = protobuf::AggregateMode::try_from(hash_agg.mode).map_err(|_| { proto_error(format!( "Received a AggregateNode message with unknown AggregateMode {}", @@ -961,13 +936,8 @@ impl protobuf::PhysicalPlanNode { .iter() .zip(hash_agg.group_expr_name.iter()) .map(|(expr, name)| { - parse_physical_expr( - expr, - registry, - input.schema().as_ref(), - extension_codec, - ) - .map(|expr| (expr, name.to_string())) + parse_physical_expr(expr, ctx, input.schema().as_ref(), extension_codec) + .map(|expr| (expr, name.to_string())) }) .collect::, _>>()?; @@ -976,13 +946,8 @@ impl protobuf::PhysicalPlanNode { .iter() .zip(hash_agg.group_expr_name.iter()) .map(|(expr, name)| { - parse_physical_expr( - expr, - registry, - input.schema().as_ref(), - extension_codec, - ) - .map(|expr| (expr, name.to_string())) + parse_physical_expr(expr, ctx, input.schema().as_ref(), extension_codec) + .map(|expr| (expr, name.to_string())) }) .collect::, _>>()?; @@ -1010,12 +975,7 @@ impl protobuf::PhysicalPlanNode { expr.expr .as_ref() .map(|e| { - parse_physical_expr( - e, - registry, - &physical_schema, - extension_codec, - ) + parse_physical_expr(e, ctx, &physical_schema, extension_codec) }) .transpose() }) @@ -1038,7 +998,7 @@ impl protobuf::PhysicalPlanNode { .map(|e| { parse_physical_expr( e, - registry, + ctx, &physical_schema, extension_codec, ) @@ -1050,7 +1010,7 @@ impl protobuf::PhysicalPlanNode { .map(|e| { parse_physical_sort_expr( e, - registry, + ctx, &physical_schema, extension_codec, ) @@ -1064,17 +1024,16 @@ impl protobuf::PhysicalPlanNode { let agg_udf = match &agg_node.fun_definition { Some(buf) => extension_codec .try_decode_udaf(udaf_name, buf)?, - None => { - registry.udaf(udaf_name).or_else(|_| { - extension_codec - .try_decode_udaf(udaf_name, &[]) - })? - } + None => ctx.udaf(udaf_name).or_else(|_| { + extension_codec + .try_decode_udaf(udaf_name, &[]) + })?, }; AggregateExprBuilder::new(agg_udf, input_phy_expr) .schema(Arc::clone(&physical_schema)) .alias(name) + .human_display(agg_node.human_display.clone()) .with_ignore_nulls(agg_node.ignore_nulls) .with_distinct(agg_node.distinct) .order_by(order_bys) @@ -1116,14 +1075,14 @@ impl protobuf::PhysicalPlanNode { fn try_into_hash_join_physical_plan( &self, hashjoin: &protobuf::HashJoinExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { let left: Arc = - into_physical_plan(&hashjoin.left, registry, runtime, extension_codec)?; + into_physical_plan(&hashjoin.left, ctx, runtime, extension_codec)?; let right: Arc = - into_physical_plan(&hashjoin.right, registry, runtime, extension_codec)?; + into_physical_plan(&hashjoin.right, ctx, runtime, extension_codec)?; let left_schema = left.schema(); let right_schema = right.schema(); let on: Vec<(PhysicalExprRef, PhysicalExprRef)> = hashjoin @@ -1132,13 +1091,13 @@ impl protobuf::PhysicalPlanNode { .map(|col| { let left = parse_physical_expr( &col.left.clone().unwrap(), - registry, + ctx, left_schema.as_ref(), extension_codec, )?; let right = parse_physical_expr( &col.right.clone().unwrap(), - registry, + ctx, right_schema.as_ref(), extension_codec, )?; @@ -1173,7 +1132,7 @@ impl protobuf::PhysicalPlanNode { f.expression.as_ref().ok_or_else(|| { proto_error("Unexpected empty filter expression") })?, - registry, &schema, + ctx, &schema, extension_codec, )?; let column_indices = f.column_indices @@ -1234,14 +1193,12 @@ impl protobuf::PhysicalPlanNode { fn try_into_symmetric_hash_join_physical_plan( &self, sym_join: &protobuf::SymmetricHashJoinExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { - let left = - into_physical_plan(&sym_join.left, registry, runtime, extension_codec)?; - let right = - into_physical_plan(&sym_join.right, registry, runtime, extension_codec)?; + let left = into_physical_plan(&sym_join.left, ctx, runtime, extension_codec)?; + let right = into_physical_plan(&sym_join.right, ctx, runtime, extension_codec)?; let left_schema = left.schema(); let right_schema = right.schema(); let on = sym_join @@ -1250,13 +1207,13 @@ impl protobuf::PhysicalPlanNode { .map(|col| { let left = parse_physical_expr( &col.left.clone().unwrap(), - registry, + ctx, left_schema.as_ref(), extension_codec, )?; let right = parse_physical_expr( &col.right.clone().unwrap(), - registry, + ctx, right_schema.as_ref(), extension_codec, )?; @@ -1291,7 +1248,7 @@ impl protobuf::PhysicalPlanNode { f.expression.as_ref().ok_or_else(|| { proto_error("Unexpected empty filter expression") })?, - registry, &schema, + ctx, &schema, extension_codec, )?; let column_indices = f.column_indices @@ -1316,7 +1273,7 @@ impl protobuf::PhysicalPlanNode { let left_sort_exprs = parse_physical_sort_exprs( &sym_join.left_sort_exprs, - registry, + ctx, &left_schema, extension_codec, )?; @@ -1324,7 +1281,7 @@ impl protobuf::PhysicalPlanNode { let right_sort_exprs = parse_physical_sort_exprs( &sym_join.right_sort_exprs, - registry, + ctx, &right_schema, extension_codec, )?; @@ -1364,17 +1321,13 @@ impl protobuf::PhysicalPlanNode { fn try_into_union_physical_plan( &self, union: &protobuf::UnionExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { let mut inputs: Vec> = vec![]; for input in &union.inputs { - inputs.push(input.try_into_physical_plan( - registry, - runtime, - extension_codec, - )?); + inputs.push(input.try_into_physical_plan(ctx, runtime, extension_codec)?); } Ok(Arc::new(UnionExec::new(inputs))) } @@ -1382,17 +1335,13 @@ impl protobuf::PhysicalPlanNode { fn try_into_interleave_physical_plan( &self, interleave: &protobuf::InterleaveExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { let mut inputs: Vec> = vec![]; for input in &interleave.inputs { - inputs.push(input.try_into_physical_plan( - registry, - runtime, - extension_codec, - )?); + inputs.push(input.try_into_physical_plan(ctx, runtime, extension_codec)?); } Ok(Arc::new(InterleaveExec::try_new(inputs)?)) } @@ -1400,21 +1349,21 @@ impl protobuf::PhysicalPlanNode { fn try_into_cross_join_physical_plan( &self, crossjoin: &protobuf::CrossJoinExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { let left: Arc = - into_physical_plan(&crossjoin.left, registry, runtime, extension_codec)?; + into_physical_plan(&crossjoin.left, ctx, runtime, extension_codec)?; let right: Arc = - into_physical_plan(&crossjoin.right, registry, runtime, extension_codec)?; + into_physical_plan(&crossjoin.right, ctx, runtime, extension_codec)?; Ok(Arc::new(CrossJoinExec::new(left, right))) } fn try_into_empty_physical_plan( &self, empty: &protobuf::EmptyExecNode, - _registry: &dyn FunctionRegistry, + _ctx: &SessionContext, _runtime: &RuntimeEnv, _extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { @@ -1425,7 +1374,7 @@ impl protobuf::PhysicalPlanNode { fn try_into_placeholder_row_physical_plan( &self, placeholder: &protobuf::PlaceholderRowExecNode, - _registry: &dyn FunctionRegistry, + _ctx: &SessionContext, _runtime: &RuntimeEnv, _extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { @@ -1436,11 +1385,11 @@ impl protobuf::PhysicalPlanNode { fn try_into_sort_physical_plan( &self, sort: &protobuf::SortExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { - let input = into_physical_plan(&sort.input, registry, runtime, extension_codec)?; + let input = into_physical_plan(&sort.input, ctx, runtime, extension_codec)?; let exprs = sort .expr .iter() @@ -1461,7 +1410,7 @@ impl protobuf::PhysicalPlanNode { })? .as_ref(); Ok(PhysicalSortExpr { - expr: parse_physical_expr(expr, registry, input.schema().as_ref(), extension_codec)?, + expr: parse_physical_expr(expr, ctx, input.schema().as_ref(), extension_codec)?, options: SortOptions { descending: !sort_expr.asc, nulls_first: sort_expr.nulls_first, @@ -1488,11 +1437,11 @@ impl protobuf::PhysicalPlanNode { fn try_into_sort_preserving_merge_physical_plan( &self, sort: &protobuf::SortPreservingMergeExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { - let input = into_physical_plan(&sort.input, registry, runtime, extension_codec)?; + let input = into_physical_plan(&sort.input, ctx, runtime, extension_codec)?; let exprs = sort .expr .iter() @@ -1515,7 +1464,7 @@ impl protobuf::PhysicalPlanNode { Ok(PhysicalSortExpr { expr: parse_physical_expr( expr, - registry, + ctx, input.schema().as_ref(), extension_codec, )?, @@ -1541,18 +1490,18 @@ impl protobuf::PhysicalPlanNode { fn try_into_extension_physical_plan( &self, extension: &protobuf::PhysicalExtensionNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { let inputs: Vec> = extension .inputs .iter() - .map(|i| i.try_into_physical_plan(registry, runtime, extension_codec)) + .map(|i| i.try_into_physical_plan(ctx, runtime, extension_codec)) .collect::>()?; let extension_node = - extension_codec.try_decode(extension.node.as_slice(), &inputs, registry)?; + extension_codec.try_decode(extension.node.as_slice(), &inputs, ctx)?; Ok(extension_node) } @@ -1560,14 +1509,14 @@ impl protobuf::PhysicalPlanNode { fn try_into_nested_loop_join_physical_plan( &self, join: &protobuf::NestedLoopJoinExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { let left: Arc = - into_physical_plan(&join.left, registry, runtime, extension_codec)?; + into_physical_plan(&join.left, ctx, runtime, extension_codec)?; let right: Arc = - into_physical_plan(&join.right, registry, runtime, extension_codec)?; + into_physical_plan(&join.right, ctx, runtime, extension_codec)?; let join_type = protobuf::JoinType::try_from(join.join_type).map_err(|_| { proto_error(format!( "Received a NestedLoopJoinExecNode message with unknown JoinType {}", @@ -1588,7 +1537,7 @@ impl protobuf::PhysicalPlanNode { f.expression.as_ref().ok_or_else(|| { proto_error("Unexpected empty filter expression") })?, - registry, &schema, + ctx, &schema, extension_codec, )?; let column_indices = f.column_indices @@ -1634,12 +1583,12 @@ impl protobuf::PhysicalPlanNode { fn try_into_analyze_physical_plan( &self, analyze: &protobuf::AnalyzeExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { let input: Arc = - into_physical_plan(&analyze.input, registry, runtime, extension_codec)?; + into_physical_plan(&analyze.input, ctx, runtime, extension_codec)?; Ok(Arc::new(AnalyzeExec::new( analyze.verbose, analyze.show_statistics, @@ -1651,11 +1600,11 @@ impl protobuf::PhysicalPlanNode { fn try_into_json_sink_physical_plan( &self, sink: &protobuf::JsonSinkExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { - let input = into_physical_plan(&sink.input, registry, runtime, extension_codec)?; + let input = into_physical_plan(&sink.input, ctx, runtime, extension_codec)?; let data_sink: JsonSink = sink .sink @@ -1669,7 +1618,7 @@ impl protobuf::PhysicalPlanNode { .map(|collection| { parse_physical_sort_exprs( &collection.physical_sort_expr_nodes, - registry, + ctx, &sink_schema, extension_codec, ) @@ -1689,11 +1638,11 @@ impl protobuf::PhysicalPlanNode { fn try_into_csv_sink_physical_plan( &self, sink: &protobuf::CsvSinkExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { - let input = into_physical_plan(&sink.input, registry, runtime, extension_codec)?; + let input = into_physical_plan(&sink.input, ctx, runtime, extension_codec)?; let data_sink: CsvSink = sink .sink @@ -1707,7 +1656,7 @@ impl protobuf::PhysicalPlanNode { .map(|collection| { parse_physical_sort_exprs( &collection.physical_sort_expr_nodes, - registry, + ctx, &sink_schema, extension_codec, ) @@ -1727,14 +1676,13 @@ impl protobuf::PhysicalPlanNode { fn try_into_parquet_sink_physical_plan( &self, sink: &protobuf::ParquetSinkExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { #[cfg(feature = "parquet")] { - let input = - into_physical_plan(&sink.input, registry, runtime, extension_codec)?; + let input = into_physical_plan(&sink.input, ctx, runtime, extension_codec)?; let data_sink: ParquetSink = sink .sink @@ -1748,7 +1696,7 @@ impl protobuf::PhysicalPlanNode { .map(|collection| { parse_physical_sort_exprs( &collection.physical_sort_expr_nodes, - registry, + ctx, &sink_schema, extension_codec, ) @@ -1771,12 +1719,11 @@ impl protobuf::PhysicalPlanNode { fn try_into_unnest_physical_plan( &self, unnest: &protobuf::UnnestExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { - let input = - into_physical_plan(&unnest.input, registry, runtime, extension_codec)?; + let input = into_physical_plan(&unnest.input, ctx, runtime, extension_codec)?; Ok(Arc::new(UnnestExec::new( input, @@ -1797,12 +1744,12 @@ impl protobuf::PhysicalPlanNode { fn try_into_cooperative_physical_plan( &self, field_stream: &protobuf::CooperativeExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { let input = - into_physical_plan(&field_stream.input, registry, runtime, extension_codec)?; + into_physical_plan(&field_stream.input, ctx, runtime, extension_codec)?; Ok(Arc::new(CooperativeExec::new(input))) } @@ -2819,7 +2766,7 @@ pub trait AsExecutionPlan: Debug + Send + Sync + Clone { fn try_into_physical_plan( &self, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result>; @@ -2909,12 +2856,12 @@ impl PhysicalExtensionCodec for DefaultPhysicalExtensionCodec { fn into_physical_plan( node: &Option>, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result, DataFusionError> { if let Some(field) = node { - field.try_into_physical_plan(registry, runtime, extension_codec) + field.try_into_physical_plan(ctx, runtime, extension_codec) } else { Err(proto_error("Missing required field in protobuf")) } diff --git a/datafusion/proto/src/physical_plan/to_proto.rs b/datafusion/proto/src/physical_plan/to_proto.rs index d22a0b545161e..85ced4933a438 100644 --- a/datafusion/proto/src/physical_plan/to_proto.rs +++ b/datafusion/proto/src/physical_plan/to_proto.rs @@ -17,6 +17,7 @@ use std::sync::Arc; +use arrow::datatypes::Schema; #[cfg(feature = "parquet")] use datafusion::datasource::file_format::parquet::ParquetSink; use datafusion::datasource::physical_plan::FileSink; @@ -69,6 +70,7 @@ pub fn serialize_physical_aggr_expr( distinct: aggr_expr.is_distinct(), ignore_nulls: aggr_expr.ignore_nulls(), fun_definition: (!buf.is_empty()).then_some(buf), + human_display: aggr_expr.human_display().to_string(), }, )), }) @@ -351,6 +353,10 @@ pub fn serialize_physical_expr( fun_definition: (!buf.is_empty()).then_some(buf), return_type: Some(expr.return_type().try_into()?), nullable: expr.nullable(), + return_field_name: expr + .return_field(&Schema::empty())? + .name() + .to_string(), }, )), }) diff --git a/datafusion/proto/tests/cases/mod.rs b/datafusion/proto/tests/cases/mod.rs index 4c7da2768e744..ab08f5b9be924 100644 --- a/datafusion/proto/tests/cases/mod.rs +++ b/datafusion/proto/tests/cases/mod.rs @@ -20,13 +20,14 @@ use datafusion::logical_expr::ColumnarValue; use datafusion_common::plan_err; use datafusion_expr::function::AccumulatorArgs; use datafusion_expr::{ - Accumulator, AggregateUDFImpl, PartitionEvaluator, ScalarFunctionArgs, ScalarUDFImpl, - Signature, Volatility, WindowUDFImpl, + udf_equals_hash, Accumulator, AggregateUDFImpl, PartitionEvaluator, + ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, WindowUDFImpl, }; use datafusion_functions_window_common::field::WindowUDFFieldArgs; use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; use std::any::Any; use std::fmt::Debug; +use std::hash::{DefaultHasher, Hash, Hasher}; mod roundtrip_logical_plan; mod roundtrip_physical_plan; @@ -80,6 +81,8 @@ impl ScalarUDFImpl for MyRegexUdf { fn aliases(&self) -> &[String] { &self.aliases } + + udf_equals_hash!(ScalarUDFImpl); } #[derive(Clone, PartialEq, ::prost::Message)] @@ -123,6 +126,23 @@ impl AggregateUDFImpl for MyAggregateUDF { ) -> datafusion_common::Result> { unimplemented!() } + + fn equals(&self, other: &dyn AggregateUDFImpl) -> bool { + let Some(other) = other.as_any().downcast_ref::() else { + return false; + }; + let Self { signature, result } = self; + signature == &other.signature && result == &other.result + } + + fn hash_value(&self) -> u64 { + let Self { signature, result } = self; + let mut hasher = DefaultHasher::new(); + std::any::type_name::().hash(&mut hasher); + signature.hash(&mut hasher); + result.hash(&mut hasher); + hasher.finish() + } } #[derive(Clone, PartialEq, ::prost::Message)] @@ -172,6 +192,23 @@ impl WindowUDFImpl for CustomUDWF { ) -> datafusion_common::Result { Ok(Field::new(field_args.name(), DataType::UInt64, false).into()) } + + fn equals(&self, other: &dyn WindowUDFImpl) -> bool { + let Some(other) = other.as_any().downcast_ref::() else { + return false; + }; + let Self { signature, payload } = self; + signature == &other.signature && payload == &other.payload + } + + fn hash_value(&self) -> u64 { + let Self { signature, payload } = self; + let mut hasher = DefaultHasher::new(); + std::any::type_name::().hash(&mut hasher); + signature.hash(&mut hasher); + payload.hash(&mut hasher); + hasher.finish() + } } #[derive(Debug)] diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index 993cc6f87ca30..170c2675f7417 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -28,6 +28,7 @@ use datafusion::datasource::file_format::json::{JsonFormat, JsonFormatFactory}; use datafusion::datasource::listing::{ ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl, }; +use datafusion::execution::options::ArrowReadOptions; use datafusion::optimizer::eliminate_nested_union::EliminateNestedUnion; use datafusion::optimizer::Optimizer; use datafusion_common::parsers::CompressionTypeVariant; @@ -429,13 +430,13 @@ async fn roundtrip_logical_plan_copy_to_sql_options() -> Result<()> { let input = create_csv_scan(&ctx).await?; let file_type = format_as_file_type(Arc::new(CsvFormatFactory::new())); - let plan = LogicalPlan::Copy(CopyTo { - input: Arc::new(input), - output_url: "test.csv".to_string(), - partition_by: vec!["a".to_string(), "b".to_string(), "c".to_string()], + let plan = LogicalPlan::Copy(CopyTo::new( + Arc::new(input), + "test.csv".to_string(), + vec!["a".to_string(), "b".to_string(), "c".to_string()], file_type, - options: Default::default(), - }); + Default::default(), + )); let codec = CsvLogicalExtensionCodec {}; let bytes = logical_plan_to_bytes_with_extension_codec(&plan, &codec)?; @@ -469,13 +470,13 @@ async fn roundtrip_logical_plan_copy_to_writer_options() -> Result<()> { ParquetFormatFactory::new_with_options(parquet_format), )); - let plan = LogicalPlan::Copy(CopyTo { - input: Arc::new(input), - output_url: "test.parquet".to_string(), + let plan = LogicalPlan::Copy(CopyTo::new( + Arc::new(input), + "test.parquet".to_string(), + vec!["a".to_string(), "b".to_string(), "c".to_string()], file_type, - partition_by: vec!["a".to_string(), "b".to_string(), "c".to_string()], - options: Default::default(), - }); + Default::default(), + )); let codec = ParquetLogicalExtensionCodec {}; let bytes = logical_plan_to_bytes_with_extension_codec(&plan, &codec)?; @@ -501,13 +502,13 @@ async fn roundtrip_logical_plan_copy_to_arrow() -> Result<()> { let file_type = format_as_file_type(Arc::new(ArrowFormatFactory::new())); - let plan = LogicalPlan::Copy(CopyTo { - input: Arc::new(input), - output_url: "test.arrow".to_string(), - partition_by: vec!["a".to_string(), "b".to_string(), "c".to_string()], + let plan = LogicalPlan::Copy(CopyTo::new( + Arc::new(input), + "test.arrow".to_string(), + vec!["a".to_string(), "b".to_string(), "c".to_string()], file_type, - options: Default::default(), - }); + Default::default(), + )); let codec = ArrowLogicalExtensionCodec {}; let bytes = logical_plan_to_bytes_with_extension_codec(&plan, &codec)?; @@ -548,13 +549,13 @@ async fn roundtrip_logical_plan_copy_to_csv() -> Result<()> { csv_format.clone(), ))); - let plan = LogicalPlan::Copy(CopyTo { - input: Arc::new(input), - output_url: "test.csv".to_string(), - partition_by: vec!["a".to_string(), "b".to_string(), "c".to_string()], + let plan = LogicalPlan::Copy(CopyTo::new( + Arc::new(input), + "test.csv".to_string(), + vec!["a".to_string(), "b".to_string(), "c".to_string()], file_type, - options: Default::default(), - }); + Default::default(), + )); let codec = CsvLogicalExtensionCodec {}; let bytes = logical_plan_to_bytes_with_extension_codec(&plan, &codec)?; @@ -614,13 +615,13 @@ async fn roundtrip_logical_plan_copy_to_json() -> Result<()> { json_format.clone(), ))); - let plan = LogicalPlan::Copy(CopyTo { - input: Arc::new(input), - output_url: "test.json".to_string(), - partition_by: vec!["a".to_string(), "b".to_string(), "c".to_string()], + let plan = LogicalPlan::Copy(CopyTo::new( + Arc::new(input), + "test.json".to_string(), + vec!["a".to_string(), "b".to_string(), "c".to_string()], file_type, - options: Default::default(), - }); + Default::default(), + )); // Assume JsonLogicalExtensionCodec is implemented similarly to CsvLogicalExtensionCodec let codec = JsonLogicalExtensionCodec {}; @@ -686,13 +687,13 @@ async fn roundtrip_logical_plan_copy_to_parquet() -> Result<()> { ParquetFormatFactory::new_with_options(parquet_format.clone()), )); - let plan = LogicalPlan::Copy(CopyTo { - input: Arc::new(input), - output_url: "test.parquet".to_string(), - partition_by: vec!["a".to_string(), "b".to_string(), "c".to_string()], + let plan = LogicalPlan::Copy(CopyTo::new( + Arc::new(input), + "test.parquet".to_string(), + vec!["a".to_string(), "b".to_string(), "c".to_string()], file_type, - options: Default::default(), - }); + Default::default(), + )); // Assume ParquetLogicalExtensionCodec is implemented similarly to JsonLogicalExtensionCodec let codec = ParquetLogicalExtensionCodec {}; @@ -960,8 +961,8 @@ async fn roundtrip_expr_api() -> Result<()> { array_replace_all(make_array(vec![lit(1), lit(2), lit(3)]), lit(2), lit(4)), count(lit(1)), count_distinct(lit(1)), - first_value(lit(1), None), - first_value(lit(1), Some(vec![lit(2).sort(true, true)])), + first_value(lit(1), vec![]), + first_value(lit(1), vec![lit(2).sort(true, true)]), functions_window::nth_value::first_value(lit(1)), functions_window::nth_value::last_value(lit(1)), functions_window::nth_value::nth_value(lit(1), 1), @@ -2181,7 +2182,7 @@ fn roundtrip_aggregate_udf() { vec![lit(1.0_f64)], false, Some(Box::new(lit(true))), - None, + vec![], None, )); @@ -2656,3 +2657,16 @@ async fn roundtrip_custom_listing_tables_schema() -> Result<()> { assert_eq!(plan, new_plan); Ok(()) } + +#[tokio::test] +async fn roundtrip_arrow_scan() -> Result<()> { + let ctx = SessionContext::new(); + let plan = ctx + .read_arrow("tests/testdata/test.arrow", ArrowReadOptions::default()) + .await? + .into_optimized_plan()?; + let bytes = logical_plan_to_bytes(&plan)?; + let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?; + assert_eq!(format!("{plan:?}"), format!("{logical_round_trip:?}")); + Ok(()) +} diff --git a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs index 43f9942a0a062..7646e6b9a6d5c 100644 --- a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs @@ -18,6 +18,7 @@ use std::any::Any; use std::fmt::{Display, Formatter}; use std::ops::Deref; + use std::sync::Arc; use std::vec; @@ -92,7 +93,7 @@ use datafusion::physical_plan::{ }; use datafusion::prelude::{ParquetReadOptions, SessionContext}; use datafusion::scalar::ScalarValue; -use datafusion_common::config::TableParquetOptions; +use datafusion_common::config::{ConfigOptions, TableParquetOptions}; use datafusion_common::file_options::csv_writer::CsvWriterOptions; use datafusion_common::file_options::json_writer::JsonWriterOptions; use datafusion_common::parsers::CompressionTypeVariant; @@ -140,7 +141,11 @@ fn roundtrip_test_and_return( let result_exec_plan: Arc = proto .try_into_physical_plan(ctx, runtime.deref(), codec) .expect("from proto"); - assert_eq!(format!("{exec_plan:?}"), format!("{result_exec_plan:?}")); + + pretty_assertions::assert_eq!( + format!("{exec_plan:?}"), + format!("{result_exec_plan:?}") + ); Ok(result_exec_plan) } @@ -984,6 +989,7 @@ fn roundtrip_scalar_udf() -> Result<()> { fun_def, vec![col("a", &schema)?], Field::new("f", DataType::Int64, true).into(), + Arc::new(ConfigOptions::default()), ); let project = @@ -1112,6 +1118,7 @@ fn roundtrip_scalar_udf_extension_codec() -> Result<()> { Arc::new(ScalarUDF::from(MyRegexUdf::new(".*".to_string()))), vec![col("text", &schema)?], Field::new("f", DataType::Int64, true).into(), + Arc::new(ConfigOptions::default()), )); let filter = Arc::new(FilterExec::try_new( @@ -1214,6 +1221,7 @@ fn roundtrip_aggregate_udf_extension_codec() -> Result<()> { Arc::new(ScalarUDF::from(MyRegexUdf::new(".*".to_string()))), vec![col("text", &schema)?], Field::new("f", DataType::Int64, true).into(), + Arc::new(ConfigOptions::default()), )); let udaf = Arc::new(AggregateUDF::from(MyAggregateUDF::new( @@ -1736,3 +1744,168 @@ async fn roundtrip_physical_plan_node() { let _ = plan.execute(0, ctx.task_ctx()).unwrap(); } + +/// Helper function to create a SessionContext with all TPC-H tables registered as external tables +async fn tpch_context() -> Result { + use datafusion_common::test_util::datafusion_test_data; + + let ctx = SessionContext::new(); + let test_data = datafusion_test_data(); + + // TPC-H table names + let tables = [ + "part", "supplier", "partsupp", "customer", "orders", "lineitem", "nation", + "region", + ]; + + // Create external tables for all TPC-H tables + for table in &tables { + let table_sql = format!( + "CREATE EXTERNAL TABLE {table} STORED AS PARQUET LOCATION '{test_data}/tpch_{table}_small.parquet'" + ); + ctx.sql(&table_sql).await.map_err(|e| { + DataFusionError::External( + format!("Failed to create {table} table: {e}").into(), + ) + })?; + } + + Ok(ctx) +} + +/// Helper function to get TPC-H query SQL +fn get_tpch_query_sql(query: usize) -> Result> { + use std::fs; + + if !(1..=22).contains(&query) { + return Err(DataFusionError::External( + format!("Invalid TPC-H query number: {query}").into(), + )); + } + + let filename = format!("../../benchmarks/queries/q{query}.sql"); + let contents = fs::read_to_string(&filename).map_err(|e| { + DataFusionError::External( + format!("Failed to read query file {filename}: {e}").into(), + ) + })?; + + Ok(contents + .split(';') + .map(|s| s.trim()) + .filter(|s| !s.is_empty()) + .map(|s| s.to_string()) + .collect()) +} + +#[tokio::test] +async fn test_serialize_deserialize_tpch_queries() -> Result<()> { + // Create context with TPC-H tables + let ctx = tpch_context().await?; + + // repeat to run all 22 queries + for query in 1..=22 { + // run all statements in the query + let sql = get_tpch_query_sql(query)?; + for stmt in sql { + let logical_plan = ctx.sql(&stmt).await?.into_unoptimized_plan(); + let optimized_plan = ctx.state().optimize(&logical_plan)?; + let physical_plan = ctx.state().create_physical_plan(&optimized_plan).await?; + + // serialize the physical plan + let codec = DefaultPhysicalExtensionCodec {}; + let proto = + PhysicalPlanNode::try_from_physical_plan(physical_plan.clone(), &codec)?; + + // deserialize the physical plan + let _deserialized_plan = + proto.try_into_physical_plan(&ctx, ctx.runtime_env().as_ref(), &codec)?; + } + } + + Ok(()) +} + +// Bugs: https://github.com/apache/datafusion/issues/16772 +#[tokio::test] +async fn test_round_trip_tpch_queries() -> Result<()> { + // Create context with TPC-H tables + let ctx = tpch_context().await?; + + // repeat to run all 22 queries + for query in 1..=22 { + // run all statements in the query + let sql = get_tpch_query_sql(query)?; + for stmt in sql { + roundtrip_test_sql_with_context(&stmt, &ctx).await?; + } + } + + Ok(()) +} + +// Bug 1 of https://github.com/apache/datafusion/issues/16772 +/// Test that AggregateFunctionExpr human_display field is correctly preserved +/// during serialization/deserialization roundtrip. +/// +/// Test for issue where the human_display field (used for EXPLAIN output) +/// was not being serialized to protobuf, causing it to be lost during roundtrip +/// and resulting in empty or incorrect display strings in query plans. +#[tokio::test] +async fn test_round_trip_human_display() -> Result<()> { + // Create context with TPC-H tables + let ctx = tpch_context().await?; + + let sql = "select r_name, count(1) from region group by r_name"; + roundtrip_test_sql_with_context(sql, &ctx).await?; + + let sql = "select r_name, count(*) from region group by r_name"; + roundtrip_test_sql_with_context(sql, &ctx).await?; + + let sql = "select r_name, count(r_name) from region group by r_name"; + roundtrip_test_sql_with_context(sql, &ctx).await?; + + Ok(()) +} + +// Bug 2 of https://github.com/apache/datafusion/issues/16772 +/// Test that PhysicalGroupBy groups field is correctly serialized/deserialized +/// for simple aggregates (no GROUP BY clause). +/// +/// Test for issue where simple aggregates like "SELECT SUM(col1 * col2) FROM table" +/// would incorrectly serialize groups as [[]] instead of [] during roundtrip serialization. +/// The groups field should be empty ([]) when there are no GROUP BY expressions. +#[tokio::test] +async fn test_round_trip_groups_display() -> Result<()> { + // Create context with TPC-H tables + let ctx = tpch_context().await?; + + let sql = "select sum(l_extendedprice * l_discount) as revenue from lineitem;"; + roundtrip_test_sql_with_context(sql, &ctx).await?; + + let sql = "select sum(l_extendedprice) as revenue from lineitem;"; + roundtrip_test_sql_with_context(sql, &ctx).await?; + + Ok(()) +} + +// Bug 3 of https://github.com/apache/datafusion/issues/16772 +/// Test that ScalarFunctionExpr return_field name is correctly preserved +/// during serialization/deserialization roundtrip. +/// +/// Test for issue where the return_field.name for scalar functions +/// was not being serialized to protobuf, causing it to be lost during roundtrip +/// and defaulting to a generic name like "f" instead of the proper function name. +#[tokio::test] +async fn test_round_trip_date_part_display() -> Result<()> { + // Create context with TPC-H tables + let ctx = tpch_context().await?; + + let sql = "select extract(year from l_shipdate) as l_year from lineitem "; + roundtrip_test_sql_with_context(sql, &ctx).await?; + + let sql = "select extract(month from l_shipdate) as l_year from lineitem "; + roundtrip_test_sql_with_context(sql, &ctx).await?; + + Ok(()) +} diff --git a/datafusion/proto/tests/testdata/test.arrow b/datafusion/proto/tests/testdata/test.arrow new file mode 100644 index 0000000000000..5314d9eea1345 Binary files /dev/null and b/datafusion/proto/tests/testdata/test.arrow differ diff --git a/datafusion/spark/Cargo.toml b/datafusion/spark/Cargo.toml index 2c46cac6b7b05..bc7ae380f793f 100644 --- a/datafusion/spark/Cargo.toml +++ b/datafusion/spark/Cargo.toml @@ -37,6 +37,7 @@ name = "datafusion_spark" [dependencies] arrow = { workspace = true } +chrono = { workspace = true } datafusion-catalog = { workspace = true } datafusion-common = { workspace = true } datafusion-execution = { workspace = true } @@ -44,3 +45,11 @@ datafusion-expr = { workspace = true } datafusion-functions = { workspace = true, features = ["crypto_expressions"] } datafusion-macros = { workspace = true } log = { workspace = true } + +[dev-dependencies] +criterion = { workspace = true } +rand = { workspace = true } + +[[bench]] +harness = false +name = "char" diff --git a/datafusion/spark/benches/char.rs b/datafusion/spark/benches/char.rs new file mode 100644 index 0000000000000..e30e21f69d183 --- /dev/null +++ b/datafusion/spark/benches/char.rs @@ -0,0 +1,77 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +extern crate criterion; + +use arrow::datatypes::{DataType, Field}; +use arrow::{array::PrimitiveArray, datatypes::Int64Type}; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; +use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; +use datafusion_spark::function::string::char; +use rand::rngs::StdRng; +use rand::{Rng, SeedableRng}; +use std::sync::Arc; + +/// Returns fixed seedable RNG +pub fn seedable_rng() -> StdRng { + StdRng::seed_from_u64(42) +} + +fn criterion_benchmark(c: &mut Criterion) { + let cot_fn = char(); + let size = 1024; + let input: PrimitiveArray = { + let null_density = 0.2; + let mut rng = StdRng::seed_from_u64(42); + (0..size) + .map(|_| { + if rng.random::() < null_density { + None + } else { + Some(rng.random_range::(1i64..10_000)) + } + }) + .collect() + }; + let input = Arc::new(input); + let args = vec![ColumnarValue::Array(input)]; + let arg_fields = args + .iter() + .enumerate() + .map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into()) + .collect::>(); + let config_options = Arc::new(ConfigOptions::default()); + + c.bench_function("char", |b| { + b.iter(|| { + black_box( + cot_fn + .invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + arg_fields: arg_fields.clone(), + number_rows: size, + return_field: Arc::new(Field::new("f", DataType::Utf8, true)), + config_options: Arc::clone(&config_options), + }) + .unwrap(), + ) + }) + }); +} +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/datafusion/spark/src/function/datetime/last_day.rs b/datafusion/spark/src/function/datetime/last_day.rs new file mode 100644 index 0000000000000..5a748816f40d9 --- /dev/null +++ b/datafusion/spark/src/function/datetime/last_day.rs @@ -0,0 +1,125 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::any::Any; +use std::sync::Arc; + +use arrow::array::{ArrayRef, AsArray, Date32Array}; +use arrow::datatypes::{DataType, Date32Type}; +use chrono::{Datelike, Duration, NaiveDate}; +use datafusion_common::{exec_datafusion_err, internal_err, Result, ScalarValue}; +use datafusion_expr::{ + ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, +}; + +#[derive(Debug)] +pub struct SparkLastDay { + signature: Signature, +} + +impl Default for SparkLastDay { + fn default() -> Self { + Self::new() + } +} + +impl SparkLastDay { + pub fn new() -> Self { + Self { + signature: Signature::exact(vec![DataType::Date32], Volatility::Immutable), + } + } +} + +impl ScalarUDFImpl for SparkLastDay { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "last_day" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + Ok(DataType::Date32) + } + + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { + let ScalarFunctionArgs { args, .. } = args; + let [arg] = args.as_slice() else { + return internal_err!( + "Spark `last_day` function requires 1 argument, got {}", + args.len() + ); + }; + match arg { + ColumnarValue::Scalar(ScalarValue::Date32(days)) => { + if let Some(days) = days { + Ok(ColumnarValue::Scalar(ScalarValue::Date32(Some( + spark_last_day(*days)?, + )))) + } else { + Ok(ColumnarValue::Scalar(ScalarValue::Date32(None))) + } + } + ColumnarValue::Array(array) => { + let result = match array.data_type() { + DataType::Date32 => { + let result: Date32Array = array + .as_primitive::() + .try_unary(spark_last_day)? + .with_data_type(DataType::Date32); + Ok(Arc::new(result) as ArrayRef) + } + other => { + internal_err!("Unsupported data type {other:?} for Spark function `last_day`") + } + }?; + Ok(ColumnarValue::Array(result)) + } + other => { + internal_err!("Unsupported arg {other:?} for Spark function `last_day") + } + } + } +} + +fn spark_last_day(days: i32) -> Result { + let date = Date32Type::to_naive_date(days); + + let (year, month) = (date.year(), date.month()); + let (next_year, next_month) = if month == 12 { + (year + 1, 1) + } else { + (year, month + 1) + }; + + let first_day_next_month = NaiveDate::from_ymd_opt(next_year, next_month, 1) + .ok_or_else(|| { + exec_datafusion_err!( + "Spark `last_day`: Unable to parse date from {next_year}, {next_month}, 1" + ) + })?; + + Ok(Date32Type::from_naive_date( + first_day_next_month - Duration::days(1), + )) +} diff --git a/datafusion/spark/src/function/datetime/mod.rs b/datafusion/spark/src/function/datetime/mod.rs index a87df9a2c87a0..3bde960ae0120 100644 --- a/datafusion/spark/src/function/datetime/mod.rs +++ b/datafusion/spark/src/function/datetime/mod.rs @@ -15,11 +15,24 @@ // specific language governing permissions and limitations // under the License. +pub mod last_day; + use datafusion_expr::ScalarUDF; +use datafusion_functions::make_udf_function; use std::sync::Arc; -pub mod expr_fn {} +make_udf_function!(last_day::SparkLastDay, last_day); + +pub mod expr_fn { + use datafusion_functions::export_functions; + + export_functions!(( + last_day, + "Returns the last day of the month which the date belongs to.", + arg1 + )); +} pub fn functions() -> Vec> { - vec![] + vec![last_day()] } diff --git a/datafusion/spark/src/function/math/hex.rs b/datafusion/spark/src/function/math/hex.rs index 614d1d4e9ac19..e5a72ea1df288 100644 --- a/datafusion/spark/src/function/math/hex.rs +++ b/datafusion/spark/src/function/math/hex.rs @@ -27,6 +27,7 @@ use arrow::{ array::{as_dictionary_array, as_largestring_array, as_string_array}, datatypes::Int32Type, }; +use datafusion_common::cast::as_string_view_array; use datafusion_common::{ cast::{as_binary_array, as_fixed_size_binary_array, as_int64_array}, exec_err, DataFusionError, @@ -98,12 +99,14 @@ impl ScalarUDFImpl for SparkHex { match &arg_types[0] { DataType::Int64 | DataType::Utf8 + | DataType::Utf8View | DataType::LargeUtf8 | DataType::Binary | DataType::LargeBinary => Ok(vec![arg_types[0].clone()]), DataType::Dictionary(key_type, value_type) => match value_type.as_ref() { DataType::Int64 | DataType::Utf8 + | DataType::Utf8View | DataType::LargeUtf8 | DataType::Binary | DataType::LargeBinary => Ok(vec![arg_types[0].clone()]), @@ -212,6 +215,16 @@ pub fn compute_hex( Ok(ColumnarValue::Array(Arc::new(hexed))) } + DataType::Utf8View => { + let array = as_string_view_array(array)?; + + let hexed: StringArray = array + .iter() + .map(|v| v.map(|b| hex_bytes(b, lowercase)).transpose()) + .collect::>()?; + + Ok(ColumnarValue::Array(Arc::new(hexed))) + } DataType::LargeUtf8 => { let array = as_largestring_array(array); diff --git a/datafusion/spark/src/function/string/luhn_check.rs b/datafusion/spark/src/function/string/luhn_check.rs new file mode 100644 index 0000000000000..07a4a4a41dabf --- /dev/null +++ b/datafusion/spark/src/function/string/luhn_check.rs @@ -0,0 +1,153 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::{any::Any, sync::Arc}; + +use arrow::array::{Array, AsArray, BooleanArray}; +use arrow::datatypes::DataType; +use arrow::datatypes::DataType::Boolean; +use datafusion_common::utils::take_function_args; +use datafusion_common::{exec_err, Result, ScalarValue}; +use datafusion_expr::{ + ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, + Volatility, +}; + +/// Spark-compatible `luhn_check` expression +/// +#[derive(Debug)] +pub struct SparkLuhnCheck { + signature: Signature, +} + +impl Default for SparkLuhnCheck { + fn default() -> Self { + Self::new() + } +} + +impl SparkLuhnCheck { + pub fn new() -> Self { + Self { + signature: Signature::one_of( + vec![ + TypeSignature::Exact(vec![DataType::Utf8]), + TypeSignature::Exact(vec![DataType::Utf8View]), + TypeSignature::Exact(vec![DataType::LargeUtf8]), + ], + Volatility::Immutable, + ), + } + } +} + +impl ScalarUDFImpl for SparkLuhnCheck { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "luhn_check" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + Ok(Boolean) + } + + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { + let [array] = take_function_args(self.name(), &args.args)?; + + match array { + ColumnarValue::Array(array) => match array.data_type() { + DataType::Utf8View => { + let str_array = array.as_string_view(); + let values = str_array + .iter() + .map(|s| s.map(luhn_check_impl)) + .collect::(); + Ok(ColumnarValue::Array(Arc::new(values))) + } + DataType::Utf8 => { + let str_array = array.as_string::(); + let values = str_array + .iter() + .map(|s| s.map(luhn_check_impl)) + .collect::(); + Ok(ColumnarValue::Array(Arc::new(values))) + } + DataType::LargeUtf8 => { + let str_array = array.as_string::(); + let values = str_array + .iter() + .map(|s| s.map(luhn_check_impl)) + .collect::(); + Ok(ColumnarValue::Array(Arc::new(values))) + } + other => { + exec_err!("Unsupported data type {other:?} for function `luhn_check`") + } + }, + ColumnarValue::Scalar(ScalarValue::Utf8(Some(s))) + | ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(s))) + | ColumnarValue::Scalar(ScalarValue::Utf8View(Some(s))) => Ok( + ColumnarValue::Scalar(ScalarValue::Boolean(Some(luhn_check_impl(s)))), + ), + ColumnarValue::Scalar(ScalarValue::Utf8(None)) + | ColumnarValue::Scalar(ScalarValue::LargeUtf8(None)) + | ColumnarValue::Scalar(ScalarValue::Utf8View(None)) => { + Ok(ColumnarValue::Scalar(ScalarValue::Boolean(None))) + } + other => { + exec_err!("Unsupported data type {other:?} for function `luhn_check`") + } + } + } +} + +/// Validates a string using the Luhn algorithm. +/// Returns `true` if the input is a valid Luhn number. +fn luhn_check_impl(input: &str) -> bool { + let mut sum = 0u32; + let mut alt = false; + let mut digits_processed = 0; + + for b in input.as_bytes().iter().rev() { + let digit = match b { + b'0'..=b'9' => { + digits_processed += 1; + b - b'0' + } + _ => return false, + }; + + let mut val = digit as u32; + if alt { + val *= 2; + if val > 9 { + val -= 9; + } + } + sum += val; + alt = !alt; + } + + digits_processed > 0 && sum % 10 == 0 +} diff --git a/datafusion/spark/src/function/string/mod.rs b/datafusion/spark/src/function/string/mod.rs index 9d5fabe832e92..e45bf4add7721 100644 --- a/datafusion/spark/src/function/string/mod.rs +++ b/datafusion/spark/src/function/string/mod.rs @@ -17,6 +17,7 @@ pub mod ascii; pub mod char; +pub mod luhn_check; use datafusion_expr::ScalarUDF; use datafusion_functions::make_udf_function; @@ -24,6 +25,7 @@ use std::sync::Arc; make_udf_function!(ascii::SparkAscii, ascii); make_udf_function!(char::SparkChar, char); +make_udf_function!(luhn_check::SparkLuhnCheck, luhn_check); pub mod expr_fn { use datafusion_functions::export_functions; @@ -38,8 +40,13 @@ pub mod expr_fn { "Returns the ASCII character having the binary equivalent to col. If col is larger than 256 the result is equivalent to char(col % 256).", arg1 )); + export_functions!(( + luhn_check, + "Returns whether the input string of digits is valid according to the Luhn algorithm.", + arg1 + )); } pub fn functions() -> Vec> { - vec![ascii(), char()] + vec![ascii(), char(), luhn_check()] } diff --git a/datafusion/spark/src/function/utils.rs b/datafusion/spark/src/function/utils.rs index 85af4bb927ca5..0db11e6f1b4ee 100644 --- a/datafusion/spark/src/function/utils.rs +++ b/datafusion/spark/src/function/utils.rs @@ -23,8 +23,9 @@ pub mod test { /// $EXPECTED_TYPE is the expected value type /// $EXPECTED_DATA_TYPE is the expected result type /// $ARRAY_TYPE is the column type after function applied + /// $CONFIG_OPTIONS config options to pass to function macro_rules! test_scalar_function { - ($FUNC:expr, $ARGS:expr, $EXPECTED:expr, $EXPECTED_TYPE:ty, $EXPECTED_DATA_TYPE:expr, $ARRAY_TYPE:ident) => { + ($FUNC:expr, $ARGS:expr, $EXPECTED:expr, $EXPECTED_TYPE:ty, $EXPECTED_DATA_TYPE:expr, $ARRAY_TYPE:ident, $CONFIG_OPTIONS:expr) => { let expected: datafusion_common::Result> = $EXPECTED; let func = $FUNC; @@ -72,6 +73,7 @@ pub mod test { number_rows: cardinality, return_field, arg_fields: arg_fields.clone(), + config_options: $CONFIG_OPTIONS, }); assert_eq!(result.is_ok(), true, "function returned an error: {}", result.unwrap_err()); @@ -101,6 +103,7 @@ pub mod test { number_rows: cardinality, return_field, arg_fields, + config_options: $CONFIG_OPTIONS, }) { Ok(_) => assert!(false, "expected error"), Err(error) => { @@ -111,6 +114,18 @@ pub mod test { } }; }; + + ($FUNC:expr, $ARGS:expr, $EXPECTED:expr, $EXPECTED_TYPE:ty, $EXPECTED_DATA_TYPE:expr, $ARRAY_TYPE:ident) => { + test_scalar_function!( + $FUNC, + $ARGS, + $EXPECTED, + $EXPECTED_TYPE, + $EXPECTED_DATA_TYPE, + $ARRAY_TYPE, + std::sync::Arc::new(datafusion_common::config::ConfigOptions::default()) + ) + }; } pub(crate) use test_scalar_function; diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs index d0cb4263dbd99..e63ca75d019d0 100644 --- a/datafusion/sql/src/expr/function.rs +++ b/datafusion/sql/src/expr/function.rs @@ -93,6 +93,8 @@ struct FunctionArgs { distinct: bool, /// WITHIN GROUP clause, if any within_group: Vec, + /// Was the function called without parenthesis, i.e. could this also be a column reference? + function_without_paranthesis: bool, } impl FunctionArgs { @@ -118,6 +120,7 @@ impl FunctionArgs { null_treatment, distinct: false, within_group, + function_without_paranthesis: matches!(args, FunctionArguments::None), }); }; @@ -199,6 +202,7 @@ impl FunctionArgs { null_treatment, distinct, within_group, + function_without_paranthesis: false, }) } } @@ -212,7 +216,7 @@ impl SqlToRel<'_, S> { ) -> Result { let function_args = FunctionArgs::try_new(function)?; let FunctionArgs { - name, + name: object_name, args, order_by, over, @@ -220,6 +224,7 @@ impl SqlToRel<'_, S> { null_treatment, distinct, within_group, + function_without_paranthesis, } = function_args; if over.is_some() && !within_group.is_empty() { @@ -235,18 +240,18 @@ impl SqlToRel<'_, S> { // it shouldn't have ordering requirement as function argument // required ordering should be defined in OVER clause. let is_function_window = over.is_some(); - let sql_parser_span = name.0[0].span(); - let name = if name.0.len() > 1 { + let sql_parser_span = object_name.0[0].span(); + let name = if object_name.0.len() > 1 { // DF doesn't handle compound identifiers // (e.g. "foo.bar") for function names yet - name.to_string() + object_name.to_string() } else { - match name.0[0].as_ident() { + match object_name.0[0].as_ident() { Some(ident) => crate::utils::normalize_ident(ident.clone()), None => { return plan_err!( "Expected an identifier in function name, but found {:?}", - name.0[0] + object_name.0[0] ) } } @@ -406,21 +411,20 @@ impl SqlToRel<'_, S> { .chain(args) .collect::>(); } - (!within_group.is_empty()).then_some(within_group) + within_group } else { let order_by = if !order_by.is_empty() { order_by } else { within_group }; - let order_by = self.order_by_to_sort_expr( + self.order_by_to_sort_expr( order_by, schema, planner_context, true, None, - )?; - (!order_by.is_empty()).then_some(order_by) + )? }; let filter: Option> = filter @@ -462,6 +466,31 @@ impl SqlToRel<'_, S> { ))); } } + + // workaround for https://github.com/apache/datafusion-sqlparser-rs/issues/1909 + if function_without_paranthesis { + let maybe_ids = object_name + .0 + .iter() + .map(|part| part.as_ident().cloned().ok_or(())) + .collect::, ()>>(); + if let Ok(ids) = maybe_ids { + if ids.len() == 1 { + return self.sql_identifier_to_expr( + ids.into_iter().next().unwrap(), + schema, + planner_context, + ); + } else { + return self.sql_compound_identifier_to_expr( + ids, + schema, + planner_context, + ); + } + } + } + // Could not find the relevant function, so return an error if let Some(suggested_func_name) = suggest_valid_function(&name, is_function_window, self.context_provider) diff --git a/datafusion/sql/src/expr/identifier.rs b/datafusion/sql/src/expr/identifier.rs index 7c276ce53e35d..434ac50bce507 100644 --- a/datafusion/sql/src/expr/identifier.rs +++ b/datafusion/sql/src/expr/identifier.rs @@ -459,8 +459,8 @@ mod test { fn test_form_identifier() -> Result<()> { let err = form_identifier(&[]).expect_err("empty identifiers didn't fail"); let expected = "Internal error: Incorrect number of identifiers: 0.\n\ - This was likely caused by a bug in DataFusion's code and we would \ - welcome that you file an bug report in our issue tracker"; + This issue was likely caused by a bug in DataFusion's code. Please help us to resolve this \ + by filing a bug report in our issue tracker: https://github.com/apache/datafusion/issues"; assert!(expected.starts_with(&err.strip_backtrace())); let ids = vec!["a".to_string()]; @@ -497,8 +497,8 @@ mod test { ]) .expect_err("too many identifiers didn't fail"); let expected = "Internal error: Incorrect number of identifiers: 5.\n\ - This was likely caused by a bug in DataFusion's code and we would \ - welcome that you file an bug report in our issue tracker"; + This issue was likely caused by a bug in DataFusion's code. Please help us to resolve this \ + by filing a bug report in our issue tracker: https://github.com/apache/datafusion/issues"; assert!(expected.starts_with(&err.strip_backtrace())); Ok(()) diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index 1254a1997dbe7..e92869873731f 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -21,9 +21,8 @@ use datafusion_expr::planner::{ }; use sqlparser::ast::{ AccessExpr, BinaryOperator, CastFormat, CastKind, DataType as SQLDataType, - DictionaryField, Expr as SQLExpr, ExprWithAlias as SQLExprWithAlias, - FunctionArguments, MapEntry, StructField, Subscript, TrimWhereField, Value, - ValueWithSpan, + DictionaryField, Expr as SQLExpr, ExprWithAlias as SQLExprWithAlias, MapEntry, + StructField, Subscript, TrimWhereField, Value, ValueWithSpan, }; use datafusion_common::{ @@ -477,21 +476,7 @@ impl SqlToRel<'_, S> { ), SQLExpr::Function(function) => { - // workaround for https://github.com/apache/datafusion-sqlparser-rs/issues/1909 - if matches!(function.args, FunctionArguments::None) - && function.name.0.len() > 1 - && function.name.0.iter().all(|part| part.as_ident().is_some()) - { - let ids = function - .name - .0 - .iter() - .map(|part| part.as_ident().expect("just checked").clone()) - .collect(); - self.sql_compound_identifier_to_expr(ids, schema, planner_context) - } else { - self.sql_function_to_expr(function, schema, planner_context) - } + self.sql_function_to_expr(function, schema, planner_context) } SQLExpr::Rollup(exprs) => { diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs index 9731eebad167d..2c673162ec9c0 100644 --- a/datafusion/sql/src/parser.rs +++ b/datafusion/sql/src/parser.rs @@ -413,13 +413,18 @@ impl<'a> DFParser<'a> { parser.parse_statements() } + pub fn parse_sql_into_expr(sql: &str) -> Result { + DFParserBuilder::new(sql).build()?.parse_into_expr() + } + pub fn parse_sql_into_expr_with_dialect( sql: &str, dialect: &dyn Dialect, ) -> Result { - let mut parser = DFParserBuilder::new(sql).with_dialect(dialect).build()?; - - parser.parse_expr() + DFParserBuilder::new(sql) + .with_dialect(dialect) + .build()? + .parse_into_expr() } /// Parse a sql string into one or [`Statement`]s @@ -465,6 +470,19 @@ impl<'a> DFParser<'a> { ) } + fn expect_token( + &mut self, + expected: &str, + token: Token, + ) -> Result<(), DataFusionError> { + let next_token = self.parser.peek_token_ref(); + if next_token.token != token { + self.expected(expected, next_token.clone()) + } else { + Ok(()) + } + } + /// Parse a new expression pub fn parse_statement(&mut self) -> Result { match self.parser.peek_token().token { @@ -514,6 +532,16 @@ impl<'a> DFParser<'a> { Ok(self.parser.parse_expr_with_alias()?) } + /// Parses the entire SQL string into an expression. + /// + /// In contrast to [`DFParser::parse_expr`], this function will report an error if the input + /// contains any trailing, unparsed tokens. + pub fn parse_into_expr(&mut self) -> Result { + let expr = self.parse_expr()?; + self.expect_token("end of expression", Token::EOF)?; + Ok(expr) + } + /// Helper method to parse a statement and handle errors consistently, especially for recursion limits fn parse_and_handle_statement(&mut self) -> Result { self.parser @@ -521,13 +549,13 @@ impl<'a> DFParser<'a> { .map(|stmt| Statement::Statement(Box::from(stmt))) .map_err(|e| match e { ParserError::RecursionLimitExceeded => DataFusionError::SQL( - ParserError::RecursionLimitExceeded, + Box::new(ParserError::RecursionLimitExceeded), Some(format!( " (current limit: {})", self.options.recursion_limit )), ), - other => DataFusionError::SQL(other, None), + other => DataFusionError::SQL(Box::new(other), None), }) } @@ -1021,7 +1049,7 @@ mod tests { use super::*; use datafusion_common::assert_contains; use sqlparser::ast::Expr::Identifier; - use sqlparser::ast::{BinaryOperator, DataType, Expr, Ident}; + use sqlparser::ast::{BinaryOperator, DataType, Expr, Ident, ValueWithSpan}; use sqlparser::dialect::SnowflakeDialect; use sqlparser::tokenizer::Span; @@ -1783,4 +1811,83 @@ mod tests { "SQL error: RecursionLimitExceeded (current limit: 1)" ); } + + fn expect_parse_expr_ok(sql: &str, expected: ExprWithAlias) { + let expr = DFParser::parse_sql_into_expr(sql).unwrap(); + assert_eq!(expr, expected, "actual:\n{expr:#?}"); + } + + /// Parses sql and asserts that the expected error message was found + fn expect_parse_expr_error(sql: &str, expected_error: &str) { + match DFParser::parse_sql_into_expr(sql) { + Ok(expr) => { + panic!("Expected parse error for '{sql}', but was successful: {expr:#?}"); + } + Err(e) => { + let error_message = e.to_string(); + assert!( + error_message.contains(expected_error), + "Expected error '{expected_error}' not found in actual error '{error_message}'" + ); + } + } + } + + #[test] + fn literal() { + expect_parse_expr_ok( + "1234", + ExprWithAlias { + expr: Expr::Value(ValueWithSpan::from(Value::Number( + "1234".to_string(), + false, + ))), + alias: None, + }, + ) + } + + #[test] + fn literal_with_alias() { + expect_parse_expr_ok( + "1234 as foo", + ExprWithAlias { + expr: Expr::Value(ValueWithSpan::from(Value::Number( + "1234".to_string(), + false, + ))), + alias: Some(Ident::from("foo")), + }, + ) + } + + #[test] + fn literal_with_alias_and_trailing_tokens() { + expect_parse_expr_error( + "1234 as foo.bar", + "Expected: end of expression, found: .", + ) + } + + #[test] + fn literal_with_alias_and_trailing_whitespace() { + expect_parse_expr_ok( + "1234 as foo ", + ExprWithAlias { + expr: Expr::Value(ValueWithSpan::from(Value::Number( + "1234".to_string(), + false, + ))), + alias: Some(Ident::from("foo")), + }, + ) + } + + #[test] + fn literal_with_alias_and_trailing_whitespace_and_token() { + expect_parse_expr_error( + "1234 as foo bar", + "Expected: end of expression, found: bar", + ) + } } diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index b50ad1fafda08..26c9826901158 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -391,7 +391,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { // Default expressions are restricted, column references are not allowed let empty_schema = DFSchema::empty(); let error_desc = |e: DataFusionError| match e { - DataFusionError::SchemaError(SchemaError::FieldNotFound { .. }, _) => { + DataFusionError::SchemaError(ref err, _) + if matches!(**err, SchemaError::FieldNotFound { .. }) => + { plan_datafusion_err!( "Column reference is not allowed in the DEFAULT expression : {}", e @@ -483,13 +485,19 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { } } .map_err(|err: DataFusionError| match &err { - DataFusionError::SchemaError( - SchemaError::FieldNotFound { + DataFusionError::SchemaError(inner, _) + if matches!( + inner.as_ref(), + SchemaError::FieldNotFound { .. } + ) => + { + let SchemaError::FieldNotFound { field, valid_fields, - }, - _, - ) => { + } = inner.as_ref() + else { + unreachable!() + }; let mut diagnostic = if let Some(relation) = &col.relation { Diagnostic::new_error( format!( diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index f83cffe47a17a..b2bea86f55243 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -1388,13 +1388,13 @@ impl SqlToRel<'_, S> { .map(|f| f.name().to_owned()) .collect(); - Ok(LogicalPlan::Copy(CopyTo { - input: Arc::new(input), - output_url: statement.target, - file_type, + Ok(LogicalPlan::Copy(CopyTo::new( + Arc::new(input), + statement.target, partition_by, - options: options_map, - })) + file_type, + options_map, + ))) } fn build_order_by( diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 729c6f235b1e7..4ddd5ccccbbd7 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -322,16 +322,15 @@ impl Unparser<'_> { Some(filter) => Some(Box::new(self.expr_to_sql_inner(filter)?)), None => None, }; - let within_group = if agg.func.is_ordered_set_aggregate() { - order_by - .as_ref() - .unwrap_or(&Vec::new()) - .iter() - .map(|sort_expr| self.sort_to_sql(sort_expr)) - .collect::>>()? - } else { - Vec::new() - }; + let within_group: Vec = + if agg.func.is_ordered_set_aggregate() { + order_by + .iter() + .map(|sort_expr| self.sort_to_sql(sort_expr)) + .collect::>>()? + } else { + Vec::new() + }; Ok(ast::Expr::Function(Function { name: ObjectName::from(vec![Ident { value: func_name.to_string(), diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index c82239d9b455e..dd5ec4a201189 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -16,6 +16,7 @@ // under the License. use std::any::Any; +use std::hash::Hash; #[cfg(test)] use std::sync::Arc; use std::vec; @@ -24,9 +25,9 @@ use arrow::datatypes::{TimeUnit::Nanosecond, *}; use common::MockContextProvider; use datafusion_common::{assert_contains, DataFusionError, Result}; use datafusion_expr::{ - col, logical_plan::LogicalPlan, test::function_stub::sum_udaf, ColumnarValue, - CreateIndex, DdlStatement, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, - Volatility, + col, logical_plan::LogicalPlan, test::function_stub::sum_udaf, udf_equals_hash, + ColumnarValue, CreateIndex, DdlStatement, ScalarFunctionArgs, ScalarUDF, + ScalarUDFImpl, Signature, Volatility, }; use datafusion_functions::{string, unicode}; use datafusion_sql::{ @@ -3311,7 +3312,7 @@ fn make_udf(name: &'static str, args: Vec, return_type: DataType) -> S } /// Mocked UDF -#[derive(Debug)] +#[derive(Debug, PartialEq, Hash)] struct DummyUDF { name: &'static str, signature: Signature, @@ -3348,6 +3349,8 @@ impl ScalarUDFImpl for DummyUDF { fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result { panic!("dummy - not implemented") } + + udf_equals_hash!(ScalarUDFImpl); } fn parse_decimals_parser_options() -> ParserOptions { @@ -4484,7 +4487,7 @@ fn assert_field_not_found(mut err: DataFusionError, name: &str) { } }; match err { - DataFusionError::SchemaError { .. } => { + DataFusionError::SchemaError(_, _) => { let msg = format!("{err}"); let expected = format!("Schema error: No field named {name}."); if !msg.starts_with(&expected) { diff --git a/datafusion/sqllogictest/Cargo.toml b/datafusion/sqllogictest/Cargo.toml index 54c53f7375c4f..950bf5fabd70e 100644 --- a/datafusion/sqllogictest/Cargo.toml +++ b/datafusion/sqllogictest/Cargo.toml @@ -42,7 +42,7 @@ async-trait = { workspace = true } bigdecimal = { workspace = true } bytes = { workspace = true, optional = true } chrono = { workspace = true, optional = true } -clap = { version = "4.5.40", features = ["derive", "env"] } +clap = { version = "4.5.41", features = ["derive", "env"] } datafusion = { workspace = true, default-features = true, features = ["avro"] } datafusion-spark = { workspace = true, default-features = true } datafusion-substrait = { workspace = true, default-features = true } @@ -60,8 +60,8 @@ rust_decimal = { version = "1.37.2", features = ["tokio-pg"] } sqllogictest = "0.28.3" sqlparser = { workspace = true } tempfile = { workspace = true } -testcontainers = { version = "0.24", features = ["default"], optional = true } -testcontainers-modules = { version = "0.12", features = ["postgres"], optional = true } +testcontainers = { workspace = true, optional = true } +testcontainers-modules = { workspace = true, features = ["postgres"], optional = true } thiserror = "2.0.12" tokio = { workspace = true } tokio-postgres = { version = "0.7.12", optional = true } diff --git a/datafusion/sqllogictest/bin/sqllogictests.rs b/datafusion/sqllogictest/bin/sqllogictests.rs index d5fce1a7cdb24..a4b85bdd1d70f 100644 --- a/datafusion/sqllogictest/bin/sqllogictests.rs +++ b/datafusion/sqllogictest/bin/sqllogictests.rs @@ -194,7 +194,7 @@ async fn run_tests() -> Result<()> { .join() }) // run up to num_cpus streams in parallel - .buffer_unordered(get_available_parallelism()) + .buffer_unordered(options.test_threads) .flat_map(|result| { // Filter out any Ok() leaving only the DataFusionErrors futures::stream::iter(match result { @@ -689,6 +689,13 @@ struct Options { help = "IGNORED (for compatibility with built-in rust test runner)" )] nocapture: bool, + + #[clap( + long, + help = "Number of threads used for running tests in parallel", + default_value_t = get_available_parallelism() + )] + test_threads: usize, } impl Options { diff --git a/datafusion/sqllogictest/src/engines/postgres_engine/mod.rs b/datafusion/sqllogictest/src/engines/postgres_engine/mod.rs index 68816626bf672..375f06d34b44f 100644 --- a/datafusion/sqllogictest/src/engines/postgres_engine/mod.rs +++ b/datafusion/sqllogictest/src/engines/postgres_engine/mod.rs @@ -93,7 +93,7 @@ impl Postgres { let spawned_task = SpawnedTask::spawn(async move { if let Err(e) = connection.await { - log::error!("Postgres connection error: {:?}", e); + log::error!("Postgres connection error: {e:?}"); } }); diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt index 3f064485e51aa..bdf327c98248a 100644 --- a/datafusion/sqllogictest/test_files/aggregate.slt +++ b/datafusion/sqllogictest/test_files/aggregate.slt @@ -2548,7 +2548,117 @@ select covar_samp(c1, c2), arrow_typeof(covar_samp(c1, c2)) from t; statement ok drop table t; +# correlation_f64_1 +statement ok +create table t (c1 double, c2 double) as values (1, 4), (2, 5), (3, 6); + +query RT rowsort +select corr(c1, c2), arrow_typeof(corr(c1, c2)) from t; +---- +1 Float64 + +# correlation with different numeric types (create test data) +statement ok +CREATE OR REPLACE TABLE corr_test( + int8_col TINYINT, + int16_col SMALLINT, + int32_col INT, + int64_col BIGINT, + uint32_col INT UNSIGNED, + float32_col FLOAT, + float64_col DOUBLE +) as VALUES +(1, 10, 100, 1000, 10000, 1.1, 10.1), +(2, 20, 200, 2000, 20000, 2.2, 20.2), +(3, 30, 300, 3000, 30000, 3.3, 30.3), +(4, 40, 400, 4000, 40000, 4.4, 40.4), +(5, 50, 500, 5000, 50000, 5.5, 50.5); + +# correlation using int32 and float64 +query R +SELECT corr(int32_col, float64_col) FROM corr_test; +---- +1 + +# correlation using int64 and int32 +query R +SELECT corr(int64_col, int32_col) FROM corr_test; +---- +1 + +# correlation using float32 and int8 +query R +SELECT corr(float32_col, int8_col) FROM corr_test; +---- +1 + +# correlation using uint32 and int16 +query R +SELECT corr(uint32_col, int16_col) FROM corr_test; +---- +1 + +# correlation with nulls +statement ok +CREATE OR REPLACE TABLE corr_nulls( + x INT, + y DOUBLE +) as VALUES +(1, 10.0), +(2, 20.0), +(NULL, 30.0), +(4, NULL), +(5, 50.0); + +# correlation with some nulls (should skip null pairs) +query R +SELECT corr(x, y) FROM corr_nulls; +---- +1 + +# correlation with single row (should return NULL) +statement ok +CREATE OR REPLACE TABLE corr_single_row( + x INT, + y DOUBLE +) as VALUES +(1, 10.0); + +query R +SELECT corr(x, y) FROM corr_single_row; +---- +0 + +# correlation with all nulls +statement ok +CREATE OR REPLACE TABLE corr_all_nulls( + x INT, + y DOUBLE +) as VALUES +(NULL, NULL), +(NULL, NULL); + +query R +SELECT corr(x, y) FROM corr_all_nulls; +---- +NULL + +statement ok +drop table corr_test; + +statement ok +drop table corr_nulls; + +statement ok +drop table corr_single_row; + +statement ok +drop table corr_all_nulls; + # covariance_f64_4 +statement ok +drop table if exists t; + statement ok create table t (c1 double, c2 double) as values (1.1, 4.1), (2.0, 5.0), (3.0, 6.0); @@ -4241,6 +4351,50 @@ DROP VIEW binary_views statement ok DROP TABLE strings; +############ FixedSizeBinary ############ + +statement ok +CREATE TABLE binaries +AS VALUES + (X'000103', 1), + (X'000104', 1), + (X'000101', 3), + (X'000103', 1), + (X'000102', 1), + (NULL, 1), + (NULL, 4), + (X'000104', 1), + (X'000109', 2), + (X'000103', 1), + (X'000101', 2); + +statement ok +CREATE VIEW fixed_size_binary_views +AS SELECT arrow_cast(column1, 'FixedSizeBinary(3)') as value, column2 as id FROM binaries; + +query I? +SELECT id, MIN(value) FROM fixed_size_binary_views GROUP BY id ORDER BY id; +---- +1 000102 +2 000101 +3 000101 +4 NULL + +query I? +SELECT id, MAX(value) FROM fixed_size_binary_views GROUP BY id ORDER BY id; +---- +1 000104 +2 000109 +3 000101 +4 NULL + +statement ok +DROP VIEW fixed_size_binary_views; + +statement ok +DROP TABLE binaries; + + ################# # End min_max on strings/binary with null values and groups ################# diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index a3d9c3e1d9c1f..8cbc4272501a2 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -8065,6 +8065,13 @@ FixedSizeList(Field { name: "item", data_type: Int32, nullable: true, dict_id: 0 statement error create table varying_fixed_size_col_table (a int[3]) as values ([1,2,3]), ([4,5]); +# https://github.com/apache/datafusion/issues/16187 +# should be NULL in case of out of bounds for Null Type +query ? +select [named_struct('a', 1, 'b', null)][-2]; +---- +NULL + ### Delete tables statement ok diff --git a/datafusion/sqllogictest/test_files/dates.slt b/datafusion/sqllogictest/test_files/dates.slt index 148f0dfe64bb7..2e91a0363db06 100644 --- a/datafusion/sqllogictest/test_files/dates.slt +++ b/datafusion/sqllogictest/test_files/dates.slt @@ -108,6 +108,17 @@ SELECT '2023-01-01T00:00:00'::timestamp - DATE '2021-01-01'; ---- 730 days 0 hours 0 mins 0.000000000 secs +# NULL with DATE arithmetic should yield NULL +query ? +SELECT NULL - DATE '1984-02-28'; +---- +NULL + +query ? +SELECT DATE '1984-02-28' - NULL +---- +NULL + # to_date_test statement ok create table to_date_t1(ts bigint) as VALUES diff --git a/datafusion/sqllogictest/test_files/decimal.slt b/datafusion/sqllogictest/test_files/decimal.slt index 089910785ad9d..2c91e820411ea 100644 --- a/datafusion/sqllogictest/test_files/decimal.slt +++ b/datafusion/sqllogictest/test_files/decimal.slt @@ -747,3 +747,31 @@ SELECT cast(cast('5.20' as decimal(4,2)) as decimal(3,2)) ---- 0 5.2 + +query RR +SELECT + arrow_cast(1.23,'Decimal128(3,2)') - arrow_cast(123, 'UInt64') as subtration_uint, + arrow_cast(1.23,'Decimal128(3,2)') - arrow_cast(123, 'Int64') as subtration_int +---- +-121.77 -121.77 + +query RR +SELECT + arrow_cast(1.23,'Decimal128(3,2)') + arrow_cast(123, 'UInt64') as addition_uint, + arrow_cast(1.23,'Decimal128(3,2)') + arrow_cast(123, 'Int64') as addition_int +---- +124.23 124.23 + +query RR +SELECT + arrow_cast(1.23,'Decimal128(3,2)') * arrow_cast(123, 'UInt64') as mulitplication_uint, + arrow_cast(1.23,'Decimal128(3,2)') * arrow_cast(123, 'Int64') as multiplication_int +---- +151.29 151.29 + +query RR +SELECT + arrow_cast(1.23,'Decimal128(3,2)') / arrow_cast(123, 'UInt64') as divison_uint, + arrow_cast(1.23,'Decimal128(3,2)') / arrow_cast(123, 'Int64') as divison_int +---- +0.01 0.01 diff --git a/datafusion/sqllogictest/test_files/explain.slt b/datafusion/sqllogictest/test_files/explain.slt index 50575a3aba4dd..4d61b254f5077 100644 --- a/datafusion/sqllogictest/test_files/explain.slt +++ b/datafusion/sqllogictest/test_files/explain.slt @@ -226,7 +226,7 @@ initial_physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/dat initial_physical_plan_with_stats DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/example.csv]]}, projection=[a, b, c], file_type=csv, has_header=true, statistics=[Rows=Absent, Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:)]] initial_physical_plan_with_schema DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/example.csv]]}, projection=[a, b, c], file_type=csv, has_header=true, schema=[a:Int32;N, b:Int32;N, c:Int32;N] physical_plan after OutputRequirements -01)OutputRequirementExec +01)OutputRequirementExec: order_by=[], dist_by=Unspecified 02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/example.csv]]}, projection=[a, b, c], file_type=csv, has_header=true physical_plan after aggregate_statistics SAME TEXT AS ABOVE physical_plan after join_selection SAME TEXT AS ABOVE @@ -303,7 +303,7 @@ initial_physical_plan_with_schema 01)GlobalLimitExec: skip=0, fetch=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:BinaryView;N, string_col:BinaryView;N, timestamp_col:Timestamp(Nanosecond, None);N] 02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:BinaryView;N, string_col:BinaryView;N, timestamp_col:Timestamp(Nanosecond, None);N] physical_plan after OutputRequirements -01)OutputRequirementExec, statistics=[Rows=Exact(8), Bytes=Exact(671), [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]] +01)OutputRequirementExec: order_by=[], dist_by=Unspecified, statistics=[Rows=Exact(8), Bytes=Exact(671), [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]] 02)--GlobalLimitExec: skip=0, fetch=10, statistics=[Rows=Exact(8), Bytes=Exact(671), [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]] 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, statistics=[Rows=Exact(8), Bytes=Exact(671), [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]] physical_plan after aggregate_statistics SAME TEXT AS ABOVE @@ -347,7 +347,7 @@ initial_physical_plan_with_schema 01)GlobalLimitExec: skip=0, fetch=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:BinaryView;N, string_col:BinaryView;N, timestamp_col:Timestamp(Nanosecond, None);N] 02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:BinaryView;N, string_col:BinaryView;N, timestamp_col:Timestamp(Nanosecond, None);N] physical_plan after OutputRequirements -01)OutputRequirementExec +01)OutputRequirementExec: order_by=[], dist_by=Unspecified 02)--GlobalLimitExec: skip=0, fetch=10 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet physical_plan after aggregate_statistics SAME TEXT AS ABOVE diff --git a/datafusion/sqllogictest/test_files/explain_tree.slt b/datafusion/sqllogictest/test_files/explain_tree.slt index f4188f4cb395b..f57c505068939 100644 --- a/datafusion/sqllogictest/test_files/explain_tree.slt +++ b/datafusion/sqllogictest/test_files/explain_tree.slt @@ -1981,3 +1981,234 @@ physical_plan 06)┌─────────────┴─────────────┐ 07)│ PlaceholderRowExec │ 08)└───────────────────────────┘ + + +# Test explain for large plans + +statement ok +CREATE TABLE t (k int) + +# By default, the plan of this large query is cropped +query TT +EXPLAIN SELECT * FROM t t1, t t2, t t3, t t4, t t5, t t6, t t7, t t8, t t9, t t10 +---- +physical_plan +01)┌───────────────────────────┐ +02)│ CrossJoinExec ├──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +03)└─────────────┬─────────────┘ +04)┌─────────────┴─────────────┐ +05)│ CrossJoinExec │ +06)│ │ +07)│ ├─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +08)│ │ │ +09)│ │ │ +10)└─────────────┬─────────────┘ │ +11)┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐ +12)│ CrossJoinExec │ │ DataSourceExec │ +13)│ │ │ -------------------- │ +14)│ ├────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ bytes: 0 │ +15)│ │ │ │ format: memory │ +16)│ │ │ │ rows: 0 │ +17)└─────────────┬─────────────┘ │ └───────────────────────────┘ +18)┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐ +19)│ CrossJoinExec │ │ DataSourceExec │ +20)│ │ │ -------------------- │ +21)│ ├───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ bytes: 0 │ +22)│ │ │ │ format: memory │ +23)│ │ │ │ rows: 0 │ +24)└─────────────┬─────────────┘ │ └───────────────────────────┘ +25)┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐ +26)│ CrossJoinExec │ │ DataSourceExec │ +27)│ │ │ -------------------- │ +28)│ ├──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ bytes: 0 │ +29)│ │ │ │ format: memory │ +30)│ │ │ │ rows: 0 │ +31)└─────────────┬─────────────┘ │ └───────────────────────────┘ +32)┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐ +33)│ CrossJoinExec │ │ DataSourceExec │ +34)│ │ │ -------------------- │ +35)│ ├─────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ bytes: 0 │ +36)│ │ │ │ format: memory │ +37)│ │ │ │ rows: 0 │ +38)└─────────────┬─────────────┘ │ └───────────────────────────┘ +39)┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐ +40)│ CrossJoinExec │ │ DataSourceExec │ +41)│ │ │ -------------------- │ +42)│ ├────────────────────────────────────────────────────────────────────────┐ │ bytes: 0 │ +43)│ │ │ │ format: memory │ +44)│ │ │ │ rows: 0 │ +45)└─────────────┬─────────────┘ │ └───────────────────────────┘ +46)┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐ +47)│ CrossJoinExec │ │ DataSourceExec │ +48)│ │ │ -------------------- │ +49)│ ├───────────────────────────────────────────┐ │ bytes: 0 │ +50)│ │ │ │ format: memory │ +51)│ │ │ │ rows: 0 │ +52)└─────────────┬─────────────┘ │ └───────────────────────────┘ +53)┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐ +54)│ CrossJoinExec │ │ DataSourceExec │ +55)│ │ │ -------------------- │ +56)│ ├──────────────┐ │ bytes: 0 │ +57)│ │ │ │ format: memory │ +58)│ │ │ │ rows: 0 │ +59)└─────────────┬─────────────┘ │ └───────────────────────────┘ +60)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ +61)│ DataSourceExec ││ DataSourceExec │ +62)│ -------------------- ││ -------------------- │ +63)│ bytes: 0 ││ bytes: 0 │ +64)│ format: memory ││ format: memory │ +65)│ rows: 0 ││ rows: 0 │ +66)└───────────────────────────┘└───────────────────────────┘ + +# Setting the tree_maximum_render_size to 0 will allow the entire plan to be rendered +statement ok +SET datafusion.explain.tree_maximum_render_width = 0 + +query TT +EXPLAIN SELECT * FROM t t1, t t2, t t3, t t4, t t5, t t6, t t7, t t8, t t9, t t10 +---- +physical_plan +01)┌───────────────────────────┐ +02)│ CrossJoinExec ├──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +03)└─────────────┬─────────────┘ │ +04)┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐ +05)│ CrossJoinExec │ │ DataSourceExec │ +06)│ │ │ -------------------- │ +07)│ ├─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ bytes: 0 │ +08)│ │ │ │ format: memory │ +09)│ │ │ │ rows: 0 │ +10)└─────────────┬─────────────┘ │ └───────────────────────────┘ +11)┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐ +12)│ CrossJoinExec │ │ DataSourceExec │ +13)│ │ │ -------------------- │ +14)│ ├────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ bytes: 0 │ +15)│ │ │ │ format: memory │ +16)│ │ │ │ rows: 0 │ +17)└─────────────┬─────────────┘ │ └───────────────────────────┘ +18)┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐ +19)│ CrossJoinExec │ │ DataSourceExec │ +20)│ │ │ -------------------- │ +21)│ ├───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ bytes: 0 │ +22)│ │ │ │ format: memory │ +23)│ │ │ │ rows: 0 │ +24)└─────────────┬─────────────┘ │ └───────────────────────────┘ +25)┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐ +26)│ CrossJoinExec │ │ DataSourceExec │ +27)│ │ │ -------------------- │ +28)│ ├──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ bytes: 0 │ +29)│ │ │ │ format: memory │ +30)│ │ │ │ rows: 0 │ +31)└─────────────┬─────────────┘ │ └───────────────────────────┘ +32)┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐ +33)│ CrossJoinExec │ │ DataSourceExec │ +34)│ │ │ -------------------- │ +35)│ ├─────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ bytes: 0 │ +36)│ │ │ │ format: memory │ +37)│ │ │ │ rows: 0 │ +38)└─────────────┬─────────────┘ │ └───────────────────────────┘ +39)┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐ +40)│ CrossJoinExec │ │ DataSourceExec │ +41)│ │ │ -------------------- │ +42)│ ├────────────────────────────────────────────────────────────────────────┐ │ bytes: 0 │ +43)│ │ │ │ format: memory │ +44)│ │ │ │ rows: 0 │ +45)└─────────────┬─────────────┘ │ └───────────────────────────┘ +46)┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐ +47)│ CrossJoinExec │ │ DataSourceExec │ +48)│ │ │ -------------------- │ +49)│ ├───────────────────────────────────────────┐ │ bytes: 0 │ +50)│ │ │ │ format: memory │ +51)│ │ │ │ rows: 0 │ +52)└─────────────┬─────────────┘ │ └───────────────────────────┘ +53)┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐ +54)│ CrossJoinExec │ │ DataSourceExec │ +55)│ │ │ -------------------- │ +56)│ ├──────────────┐ │ bytes: 0 │ +57)│ │ │ │ format: memory │ +58)│ │ │ │ rows: 0 │ +59)└─────────────┬─────────────┘ │ └───────────────────────────┘ +60)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ +61)│ DataSourceExec ││ DataSourceExec │ +62)│ -------------------- ││ -------------------- │ +63)│ bytes: 0 ││ bytes: 0 │ +64)│ format: memory ││ format: memory │ +65)│ rows: 0 ││ rows: 0 │ +66)└───────────────────────────┘└───────────────────────────┘ + +# Setting the tree_maximum_render_size to a smaller size +statement ok +SET datafusion.explain.tree_maximum_render_width = 60 + +query TT +EXPLAIN SELECT * FROM t t1, t t2, t t3, t t4, t t5, t t6, t t7, t t8, t t9, t t10 +---- +physical_plan +01)┌───────────────────────────┐ +02)│ CrossJoinExec ├────────────────────────────────────────────────────────── +03)└─────────────┬─────────────┘ +04)┌─────────────┴─────────────┐ +05)│ CrossJoinExec │ +06)│ │ +07)│ ├────────────────────────────────────────────────────────── +08)│ │ +09)│ │ +10)└─────────────┬─────────────┘ +11)┌─────────────┴─────────────┐ +12)│ CrossJoinExec │ +13)│ │ +14)│ ├────────────────────────────────────────────────────────── +15)│ │ +16)│ │ +17)└─────────────┬─────────────┘ +18)┌─────────────┴─────────────┐ +19)│ CrossJoinExec │ +20)│ │ +21)│ ├────────────────────────────────────────────────────────── +22)│ │ +23)│ │ +24)└─────────────┬─────────────┘ +25)┌─────────────┴─────────────┐ +26)│ CrossJoinExec │ +27)│ │ +28)│ ├────────────────────────────────────────────────────────── +29)│ │ +30)│ │ +31)└─────────────┬─────────────┘ +32)┌─────────────┴─────────────┐ +33)│ CrossJoinExec │ +34)│ │ +35)│ ├────────────────────────────────────────────────────────── +36)│ │ +37)│ │ +38)└─────────────┬─────────────┘ +39)┌─────────────┴─────────────┐ +40)│ CrossJoinExec │ +41)│ │ +42)│ ├────────────────────────────────────────────────────────── +43)│ │ +44)│ │ +45)└─────────────┬─────────────┘ +46)┌─────────────┴─────────────┐ +47)│ CrossJoinExec │ +48)│ │ +49)│ ├───────────────────────────────────────────┐ +50)│ │ │ +51)│ │ │ +52)└─────────────┬─────────────┘ │ +53)┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐ +54)│ CrossJoinExec │ │ DataSourceExec │ +55)│ │ │ -------------------- │ +56)│ ├──────────────┐ │ bytes: 0 │ +57)│ │ │ │ format: memory │ +58)│ │ │ │ rows: 0 │ +59)└─────────────┬─────────────┘ │ └───────────────────────────┘ +60)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ +61)│ DataSourceExec ││ DataSourceExec │ +62)│ -------------------- ││ -------------------- │ +63)│ bytes: 0 ││ bytes: 0 │ +64)│ format: memory ││ format: memory │ +65)│ rows: 0 ││ rows: 0 │ +66)└───────────────────────────┘└───────────────────────────┘ + +statement ok +DROP TABLE t diff --git a/datafusion/sqllogictest/test_files/group_by.slt b/datafusion/sqllogictest/test_files/group_by.slt index 9e67018ecd0b9..6dc4bd8404833 100644 --- a/datafusion/sqllogictest/test_files/group_by.slt +++ b/datafusion/sqllogictest/test_files/group_by.slt @@ -3425,7 +3425,7 @@ physical_plan 06)----------RepartitionExec: partitioning=Hash([sn@0, amount@1], 8), input_partitions=8 07)------------AggregateExec: mode=Partial, gby=[sn@0 as sn, amount@1 as amount], aggr=[] 08)--------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 -09)----------------DataSourceExec: partitions=1, partition_sizes=[1] +09)----------------DataSourceExec: partitions=1, partition_sizes=[2] query IRI SELECT s.sn, s.amount, 2*s.sn @@ -3494,9 +3494,9 @@ physical_plan 06)----------RepartitionExec: partitioning=Hash([sn@0, amount@1], 8), input_partitions=8 07)------------AggregateExec: mode=Partial, gby=[sn@1 as sn, amount@2 as amount], aggr=[sum(l.amount)] 08)--------------NestedLoopJoinExec: join_type=Inner, filter=sn@0 >= sn@1, projection=[amount@1, sn@2, amount@3] -09)----------------DataSourceExec: partitions=1, partition_sizes=[1] +09)----------------DataSourceExec: partitions=1, partition_sizes=[2] 10)----------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 -11)------------------DataSourceExec: partitions=1, partition_sizes=[1] +11)------------------DataSourceExec: partitions=1, partition_sizes=[2] query IRR SELECT r.sn, SUM(l.amount), r.amount @@ -3642,8 +3642,8 @@ physical_plan 07)------------AggregateExec: mode=Partial, gby=[sn@2 as sn, zip_code@0 as zip_code, country@1 as country, ts@3 as ts, currency@4 as currency, amount@5 as amount, sum_amount@6 as sum_amount], aggr=[] 08)--------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 09)----------------ProjectionExec: expr=[zip_code@0 as zip_code, country@1 as country, sn@2 as sn, ts@3 as ts, currency@4 as currency, amount@5 as amount, sum(l.amount) ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@6 as sum_amount] -10)------------------BoundedWindowAggExec: wdw=[sum(l.amount) ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(l.amount) ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted] -11)--------------------DataSourceExec: partitions=1, partition_sizes=[1] +10)------------------BoundedWindowAggExec: wdw=[sum(l.amount) ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "sum(l.amount) ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted] +11)--------------------DataSourceExec: partitions=1, partition_sizes=[2] query ITIPTRR @@ -3939,7 +3939,7 @@ physical_plan 04)------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(d@1, d@1)], filter=CAST(a@0 AS Int64) >= CAST(a@1 AS Int64) - 10, projection=[a@0, d@1, row_n@4] 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], file_type=csv, has_header=true 06)--------ProjectionExec: expr=[a@0 as a, d@1 as d, row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as row_n] -07)----------BoundedWindowAggExec: wdw=[row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +07)----------BoundedWindowAggExec: wdw=[row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 08)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], file_type=csv, has_header=true # reset partition number to 8. @@ -4535,19 +4535,20 @@ LIMIT 5 query ITIPTR rowsort SELECT r.* FROM sales_global_with_pk as l, sales_global_with_pk as r +ORDER BY 1, 2, 3, 4, 5, 6 LIMIT 5 ---- 0 GRC 0 2022-01-01T06:00:00 EUR 30 -1 FRA 1 2022-01-01T08:00:00 EUR 50 -1 FRA 3 2022-01-02T12:00:00 EUR 200 -1 TUR 2 2022-01-01T11:30:00 TRY 75 -1 TUR 4 2022-01-03T10:00:00 TRY 100 +0 GRC 0 2022-01-01T06:00:00 EUR 30 +0 GRC 0 2022-01-01T06:00:00 EUR 30 +0 GRC 0 2022-01-01T06:00:00 EUR 30 +0 GRC 0 2022-01-01T06:00:00 EUR 30 # Create a table with timestamp data statement ok CREATE TABLE src_table ( - t1 TIMESTAMP, - c2 INT + t1 TIMESTAMP, + c2 INT ) AS VALUES ('2020-12-10T00:00:00.00Z', 0), ('2020-12-11T00:00:00.00Z', 1), @@ -4592,8 +4593,8 @@ STORED AS CSV; # Create a table from the generated CSV files: statement ok CREATE EXTERNAL TABLE timestamp_table ( - t1 TIMESTAMP, - c2 INT, + t1 TIMESTAMP, + c2 INT, ) STORED AS CSV LOCATION 'test_files/scratch/group_by/timestamp_table' @@ -5176,8 +5177,8 @@ physical_plan 02)--AggregateExec: mode=Single, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 }, ts@0, 946684800000000000) as date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 }"),keywords_stream.ts,Utf8("2000-01-01"))], aggr=[count(keywords_stream.keyword)] 03)----CoalesceBatchesExec: target_batch_size=2 04)------HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(keyword@0, keyword@1)] -05)--------DataSourceExec: partitions=1, partition_sizes=[1] -06)--------DataSourceExec: partitions=1, partition_sizes=[1] +05)--------DataSourceExec: partitions=1, partition_sizes=[3] +06)--------DataSourceExec: partitions=1, partition_sizes=[3] query PI SELECT @@ -5219,17 +5220,17 @@ statement ok create table t(a int, b bytea) as values (1, 0xa), (1, 0xa), (2, null), (null, 0xb), (null, 0xb); query I?I -select a, b, count(*) from t group by grouping sets ((a, b), (a), (b)); +select a, b, count(*) from t group by grouping sets ((a, b), (a), (b)) order by a, b; ---- 1 0a 2 -2 NULL 1 -NULL 0b 2 1 NULL 2 2 NULL 1 -NULL NULL 2 +2 NULL 1 NULL 0a 2 -NULL NULL 1 NULL 0b 2 +NULL 0b 2 +NULL NULL 2 +NULL NULL 1 statement ok drop table t; @@ -5239,13 +5240,13 @@ statement ok create table t(a int, b bytea) as values (1, 0xa), (1, 0xa), (2, 0xb), (3, 0xb), (3, 0xb); query I?I -select a, b, count(*) from t group by grouping sets ((a, b), (a), (b)); +select a, b, count(*) from t group by grouping sets ((a, b), (a), (b)) order by a, b; ---- 1 0a 2 -2 0b 1 -3 0b 2 1 NULL 2 +2 0b 1 2 NULL 1 +3 0b 2 3 NULL 2 NULL 0a 2 NULL 0b 3 diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index f76e436e0ad3b..86dfbd7c84963 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -274,6 +274,7 @@ datafusion.explain.physical_plan_only false datafusion.explain.show_schema false datafusion.explain.show_sizes true datafusion.explain.show_statistics false +datafusion.explain.tree_maximum_render_width 240 datafusion.format.date_format %Y-%m-%d datafusion.format.datetime_format %Y-%m-%dT%H:%M:%S%.f datafusion.format.duration_format pretty @@ -386,6 +387,7 @@ datafusion.explain.physical_plan_only false When set to true, the explain statem datafusion.explain.show_schema false When set to true, the explain statement will print schema information datafusion.explain.show_sizes true When set to true, the explain statement will print the partition sizes datafusion.explain.show_statistics false When set to true, the explain statement will print operator statistics for physical plans +datafusion.explain.tree_maximum_render_width 240 (format=tree only) Maximum total width of the rendered tree. When set to 0, the tree will have no width limit. datafusion.format.date_format %Y-%m-%d Date format for date arrays datafusion.format.datetime_format %Y-%m-%dT%H:%M:%S%.f Format for DateTime arrays datafusion.format.duration_format pretty Duration format. Can be either `"pretty"` or `"ISO8601"` diff --git a/datafusion/sqllogictest/test_files/insert.slt b/datafusion/sqllogictest/test_files/insert.slt index 8a9c01d36308d..9a3c959884aa0 100644 --- a/datafusion/sqllogictest/test_files/insert.slt +++ b/datafusion/sqllogictest/test_files/insert.slt @@ -68,7 +68,7 @@ physical_plan 02)--ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@0 as field1, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@1 as field2] 03)----SortPreservingMergeExec: [c1@2 ASC NULLS LAST] 04)------ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, c1@0 as c1] -05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted] +05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted] 06)----------SortExec: expr=[c1@0 ASC NULLS LAST, c9@2 ASC NULLS LAST], preserve_partitioning=[true] 07)------------CoalesceBatchesExec: target_batch_size=8192 08)--------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8 @@ -128,7 +128,7 @@ physical_plan 01)DataSinkExec: sink=MemoryTable (partitions=1) 02)--CoalescePartitionsExec 03)----ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as field1, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as field2] -04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted] +04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted] 05)--------SortExec: expr=[c1@0 ASC NULLS LAST, c9@2 ASC NULLS LAST], preserve_partitioning=[true] 06)----------CoalesceBatchesExec: target_batch_size=8192 07)------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8 @@ -179,7 +179,7 @@ physical_plan 02)--ProjectionExec: expr=[a1@0 as a1, a2@1 as a2] 03)----SortPreservingMergeExec: [c1@2 ASC NULLS LAST] 04)------ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as a1, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as a2, c1@0 as c1] -05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted] +05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted] 06)----------SortExec: expr=[c1@0 ASC NULLS LAST, c9@2 ASC NULLS LAST], preserve_partitioning=[true] 07)------------CoalesceBatchesExec: target_batch_size=8192 08)--------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8 diff --git a/datafusion/sqllogictest/test_files/insert_to_external.slt b/datafusion/sqllogictest/test_files/insert_to_external.slt index 24982dfc28a75..b6e35f4081398 100644 --- a/datafusion/sqllogictest/test_files/insert_to_external.slt +++ b/datafusion/sqllogictest/test_files/insert_to_external.slt @@ -359,7 +359,7 @@ physical_plan 02)--ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@0 as field1, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@1 as field2] 03)----SortPreservingMergeExec: [c1@2 ASC NULLS LAST] 04)------ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, c1@0 as c1] -05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted] +05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted] 06)----------SortExec: expr=[c1@0 ASC NULLS LAST, c9@2 ASC NULLS LAST], preserve_partitioning=[true] 07)------------CoalesceBatchesExec: target_batch_size=8192 08)--------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8 @@ -420,7 +420,7 @@ physical_plan 01)DataSinkExec: sink=ParquetSink(file_groups=[]) 02)--CoalescePartitionsExec 03)----ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as field1, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as field2] -04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted] +04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted] 05)--------SortExec: expr=[c1@0 ASC NULLS LAST, c9@2 ASC NULLS LAST], preserve_partitioning=[true] 06)----------CoalesceBatchesExec: target_batch_size=8192 07)------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8 diff --git a/datafusion/sqllogictest/test_files/join.slt.part b/datafusion/sqllogictest/test_files/join.slt.part index 19763ab0083f8..203dc328e71eb 100644 --- a/datafusion/sqllogictest/test_files/join.slt.part +++ b/datafusion/sqllogictest/test_files/join.slt.part @@ -853,47 +853,47 @@ physical_plan 03)----DataSourceExec: partitions=1, partition_sizes=[1] 04)----DataSourceExec: partitions=1, partition_sizes=[1] -query ITT +query ITT rowsort SELECT e.emp_id, e.name, d.dept_name FROM employees AS e LEFT JOIN department AS d ON (e.name = 'Alice' OR e.name = 'Bob'); ---- -1 Alice HR 1 Alice Engineering +1 Alice HR 1 Alice Sales -2 Bob HR 2 Bob Engineering +2 Bob HR 2 Bob Sales 3 Carol NULL # neither RIGHT OUTER JOIN -query ITT +query ITT rowsort SELECT e.emp_id, e.name, d.dept_name FROM department AS d RIGHT JOIN employees AS e ON (e.name = 'Alice' OR e.name = 'Bob'); ---- -1 Alice HR 1 Alice Engineering +1 Alice HR 1 Alice Sales -2 Bob HR 2 Bob Engineering +2 Bob HR 2 Bob Sales 3 Carol NULL # neither FULL OUTER JOIN -query ITT +query ITT rowsort SELECT e.emp_id, e.name, d.dept_name FROM department AS d FULL JOIN employees AS e ON (e.name = 'Alice' OR e.name = 'Bob'); ---- -1 Alice HR 1 Alice Engineering +1 Alice HR 1 Alice Sales -2 Bob HR 2 Bob Engineering +2 Bob HR 2 Bob Sales 3 Carol NULL diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt index 3be5c1b1c370e..5d68ed35b2a98 100644 --- a/datafusion/sqllogictest/test_files/joins.slt +++ b/datafusion/sqllogictest/test_files/joins.slt @@ -24,7 +24,7 @@ statement ok set datafusion.execution.target_partitions = 2; statement ok -set datafusion.execution.batch_size = 2; +set datafusion.execution.batch_size = 8192; statement ok set datafusion.explain.logical_plan_only = true; @@ -549,64 +549,64 @@ statement ok set datafusion.optimizer.repartition_joins = true query ITT nosort -SELECT t1_id, t1_name, t2_name FROM t1, t2 ORDER BY t1_id +SELECT t1_id, t1_name, t2_name FROM t1, t2 ORDER BY t1_id, t1_name, t2_name ---- -11 a z -11 a y -11 a x 11 a w -22 b z -22 b y -22 b x +11 a x +11 a y +11 a z 22 b w -33 c z -33 c y -33 c x +22 b x +22 b y +22 b z 33 c w -44 d z -44 d y -44 d x +33 c x +33 c y +33 c z 44 d w +44 d x +44 d y +44 d z query ITT nosort -SELECT t1_id, t1_name, t2_name FROM t1, t2 WHERE 1=1 ORDER BY t1_id +SELECT t1_id, t1_name, t2_name FROM t1, t2 WHERE 1=1 ORDER BY t1_id, t1_name, t2_name ---- -11 a z -11 a y -11 a x 11 a w -22 b z -22 b y -22 b x +11 a x +11 a y +11 a z 22 b w -33 c z -33 c y -33 c x +22 b x +22 b y +22 b z 33 c w -44 d z -44 d y -44 d x +33 c x +33 c y +33 c z 44 d w +44 d x +44 d y +44 d z query ITT nosort -SELECT t1_id, t1_name, t2_name FROM t1 CROSS JOIN t2 ORDER BY t1_id +SELECT t1_id, t1_name, t2_name FROM t1 CROSS JOIN t2 ORDER BY t1_id, t1_name, t2_name ---- -11 a z -11 a y -11 a x 11 a w -22 b z -22 b y -22 b x +11 a x +11 a y +11 a z 22 b w -33 c z -33 c y -33 c x +22 b x +22 b y +22 b z 33 c w -44 d z -44 d y -44 d x +33 c x +33 c y +33 c z 44 d w +44 d x +44 d y +44 d z query ITITI rowsort SELECT * FROM (SELECT t1_id, t1_name FROM t1 UNION ALL SELECT t1_id, t1_name FROM t1) AS t1 CROSS JOIN t2 @@ -685,64 +685,64 @@ statement ok set datafusion.optimizer.repartition_joins = false query ITT nosort -SELECT t1_id, t1_name, t2_name FROM t1, t2 ORDER BY t1_id +SELECT t1_id, t1_name, t2_name FROM t1, t2 ORDER BY t1_id, t1_name, t2_name ---- -11 a z -11 a y -11 a x 11 a w -22 b z -22 b y -22 b x +11 a x +11 a y +11 a z 22 b w -33 c z -33 c y -33 c x +22 b x +22 b y +22 b z 33 c w -44 d z -44 d y -44 d x +33 c x +33 c y +33 c z 44 d w +44 d x +44 d y +44 d z query ITT nosort -SELECT t1_id, t1_name, t2_name FROM t1, t2 WHERE 1=1 ORDER BY t1_id +SELECT t1_id, t1_name, t2_name FROM t1, t2 WHERE 1=1 ORDER BY t1_id, t1_name, t2_name ---- -11 a z -11 a y -11 a x 11 a w -22 b z -22 b y -22 b x +11 a x +11 a y +11 a z 22 b w -33 c z -33 c y -33 c x +22 b x +22 b y +22 b z 33 c w -44 d z -44 d y -44 d x +33 c x +33 c y +33 c z 44 d w +44 d x +44 d y +44 d z query ITT nosort -SELECT t1_id, t1_name, t2_name FROM t1 CROSS JOIN t2 ORDER BY t1_id +SELECT t1_id, t1_name, t2_name FROM t1 CROSS JOIN t2 ORDER BY t1_id, t1_name, t2_name ---- -11 a z -11 a y -11 a x 11 a w -22 b z -22 b y -22 b x +11 a x +11 a y +11 a z 22 b w -33 c z -33 c y -33 c x +22 b x +22 b y +22 b z 33 c w -44 d z -44 d y -44 d x +33 c x +33 c y +33 c z 44 d w +44 d x +44 d y +44 d z query ITITI rowsort SELECT * FROM (SELECT t1_id, t1_name FROM t1 UNION ALL SELECT t1_id, t1_name FROM t1) AS t1 CROSS JOIN t2 @@ -2066,6 +2066,7 @@ SELECT join_t1.t1_id, join_t2.t2_id FROM join_t1 INNER JOIN join_t2 ON join_t1.t1_id > join_t2.t2_id WHERE join_t1.t1_id > 10 AND join_t2.t2_int > 1 +ORDER BY 1 ---- 22 11 33 11 @@ -2105,6 +2106,7 @@ SELECT join_t1.t1_id, join_t2.t2_id FROM (select t1_id from join_t1 where join_t1.t1_id > 22) as join_t1 RIGHT JOIN (select t2_id from join_t2 where join_t2.t2_id > 11) as join_t2 ON join_t1.t1_id < join_t2.t2_id +ORDER BY 1, 2 ---- 33 44 33 55 @@ -2151,6 +2153,7 @@ WHERE EXISTS ( FROM join_t2 WHERE join_t1.t1_id + 1 > join_t2.t2_id * 2 ) +ORDER BY 1 ---- 22 b 2 33 c 3 @@ -2167,6 +2170,7 @@ WHERE EXISTS ( FROM join_t2 WHERE join_t1.t1_id + 1 > join_t2.t2_id * 2 ) +ORDER BY 1 ---- 22 b 2 33 c 3 @@ -3195,7 +3199,7 @@ physical_plan 04)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, rn1@5 ASC NULLS LAST 05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 06)----------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1] -07)------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted] +07)------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted] 08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true 09)----CoalesceBatchesExec: target_batch_size=2 10)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST @@ -3233,7 +3237,7 @@ physical_plan 08)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, rn1@5 ASC NULLS LAST 09)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 10)----------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1] -11)------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted] +11)------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted] 12)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true statement ok @@ -3272,14 +3276,14 @@ physical_plan 06)----------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2 07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 08)--------------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1] -09)----------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted] +09)----------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted] 10)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true 11)------SortExec: expr=[a@1 ASC], preserve_partitioning=[true] 12)--------CoalesceBatchesExec: target_batch_size=2 13)----------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2 14)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 15)--------------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1] -16)----------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted] +16)----------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted] 17)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true statement ok @@ -3314,7 +3318,7 @@ physical_plan 02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@1, a@1)] 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true 04)----ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1] -05)------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted] +05)------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted] 06)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true # hash join should propagate ordering equivalence of the right side for RIGHT ANTI join. @@ -3341,7 +3345,7 @@ physical_plan 02)--HashJoinExec: mode=CollectLeft, join_type=RightAnti, on=[(a@0, a@1)] 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a], output_ordering=[a@0 ASC], file_type=csv, has_header=true 04)----ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1] -05)------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted] +05)------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted] 06)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true query TT @@ -3416,7 +3420,7 @@ physical_plan 04)------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(d@1, d@1)], filter=CAST(a@0 AS Int64) >= CAST(a@1 AS Int64) - 10, projection=[a@0, d@1, row_n@4] 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], file_type=csv, has_header=true 06)--------ProjectionExec: expr=[a@0 as a, d@1 as d, row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as row_n] -07)----------BoundedWindowAggExec: wdw=[row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +07)----------BoundedWindowAggExec: wdw=[row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 08)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], file_type=csv, has_header=true # run query above in multiple partitions @@ -3871,8 +3875,8 @@ physical_plan 02)--CoalesceBatchesExec: target_batch_size=3 03)----HashJoinExec: mode=CollectLeft, join_type=Left, on=[(b@1, b@1)] 04)------SortExec: TopK(fetch=10), expr=[b@1 ASC NULLS LAST], preserve_partitioning=[false] -05)--------DataSourceExec: partitions=1, partition_sizes=[1] -06)------DataSourceExec: partitions=1, partition_sizes=[1] +05)--------DataSourceExec: partitions=1, partition_sizes=[2] +06)------DataSourceExec: partitions=1, partition_sizes=[2] @@ -3928,8 +3932,8 @@ physical_plan 01)ProjectionExec: expr=[a@2 as a, b@3 as b, a@0 as a, b@1 as b] 02)--CoalesceBatchesExec: target_batch_size=3 03)----HashJoinExec: mode=CollectLeft, join_type=Left, on=[(b@1, b@1)] -04)------DataSourceExec: partitions=1, partition_sizes=[1] -05)------DataSourceExec: partitions=1, partition_sizes=[1] +04)------DataSourceExec: partitions=1, partition_sizes=[2] +05)------DataSourceExec: partitions=1, partition_sizes=[2] # Null build indices: @@ -3989,8 +3993,8 @@ physical_plan 02)--CoalesceBatchesExec: target_batch_size=3 03)----HashJoinExec: mode=CollectLeft, join_type=Left, on=[(b@1, b@1)] 04)------SortExec: TopK(fetch=10), expr=[b@1 ASC NULLS LAST], preserve_partitioning=[false] -05)--------DataSourceExec: partitions=1, partition_sizes=[1] -06)------DataSourceExec: partitions=1, partition_sizes=[1] +05)--------DataSourceExec: partitions=1, partition_sizes=[2] +06)------DataSourceExec: partitions=1, partition_sizes=[2] # Test CROSS JOIN LATERAL syntax (planning) @@ -4160,23 +4164,43 @@ AS VALUES (3, 3, true), (3, 3, false); -query IIIIB -SELECT * FROM t0 FULL JOIN t1 ON t0.c1 = t1.c1 LIMIT 2; +query IIIIB rowsort +-- Note: using LIMIT value higher than cardinality before LIMIT to avoid query non-determinism +SELECT * FROM t0 FULL JOIN t1 ON t0.c1 = t1.c1 LIMIT 20; ---- -2 2 2 2 true +1 1 NULL NULL NULL 2 2 2 2 false - -query IIIIB -SELECT * FROM t0 FULL JOIN t1 ON t0.c2 >= t1.c2 LIMIT 2; ----- 2 2 2 2 true -3 3 2 2 true +3 3 3 3 false +3 3 3 3 true +4 4 NULL NULL NULL -query IIIIB -SELECT * FROM t0 FULL JOIN t1 ON t0.c1 = t1.c1 AND t0.c2 >= t1.c2 LIMIT 2; +query IIIIB rowsort +-- Note: using LIMIT value higher than cardinality before LIMIT to avoid query non-determinism +SELECT * FROM t0 FULL JOIN t1 ON t0.c2 >= t1.c2 LIMIT 20; ---- +1 1 NULL NULL NULL +2 2 2 2 false 2 2 2 2 true +3 3 2 2 false +3 3 2 2 true +3 3 3 3 false +3 3 3 3 true +4 4 2 2 false +4 4 2 2 true +4 4 3 3 false +4 4 3 3 true + +query IIIIB rowsort +-- Note: using LIMIT value higher than cardinality before LIMIT to avoid query non-determinism +SELECT * FROM t0 FULL JOIN t1 ON t0.c1 = t1.c1 AND t0.c2 >= t1.c2 LIMIT 20; +---- +1 1 NULL NULL NULL 2 2 2 2 false +2 2 2 2 true +3 3 3 3 false +3 3 3 3 true +4 4 NULL NULL NULL ## Test !join.on.is_empty() && join.filter.is_none() query TT @@ -4190,8 +4214,8 @@ logical_plan physical_plan 01)CoalesceBatchesExec: target_batch_size=3, fetch=2 02)--HashJoinExec: mode=CollectLeft, join_type=Full, on=[(c1@0, c1@0)] -03)----DataSourceExec: partitions=1, partition_sizes=[1] -04)----DataSourceExec: partitions=1, partition_sizes=[1] +03)----DataSourceExec: partitions=1, partition_sizes=[2] +04)----DataSourceExec: partitions=1, partition_sizes=[2] ## Test join.on.is_empty() && join.filter.is_some() query TT @@ -4205,8 +4229,8 @@ logical_plan physical_plan 01)GlobalLimitExec: skip=0, fetch=2 02)--NestedLoopJoinExec: join_type=Full, filter=c2@0 >= c2@1 -03)----DataSourceExec: partitions=1, partition_sizes=[1] -04)----DataSourceExec: partitions=1, partition_sizes=[1] +03)----DataSourceExec: partitions=1, partition_sizes=[2] +04)----DataSourceExec: partitions=1, partition_sizes=[2] ## Test !join.on.is_empty() && join.filter.is_some() query TT @@ -4220,8 +4244,8 @@ logical_plan physical_plan 01)CoalesceBatchesExec: target_batch_size=3, fetch=2 02)--HashJoinExec: mode=CollectLeft, join_type=Full, on=[(c1@0, c1@0)], filter=c2@0 >= c2@1 -03)----DataSourceExec: partitions=1, partition_sizes=[1] -04)----DataSourceExec: partitions=1, partition_sizes=[1] +03)----DataSourceExec: partitions=1, partition_sizes=[2] +04)----DataSourceExec: partitions=1, partition_sizes=[2] ## Add more test cases for join limit pushdown statement ok @@ -4236,23 +4260,23 @@ set datafusion.execution.target_partitions = 1; # Note we use csv as MemoryExec does not support limit push down (so doesn't manifest # bugs if limits are improperly pushed down) query I -COPY (values (1), (2), (3), (4), (5)) TO 'test_files/scratch/limit/t1.csv' +COPY (values (1), (2), (3), (4), (5)) TO 'test_files/scratch/joins/t1.csv' STORED AS CSV ---- 5 # store t2 in different order so the top N rows are not the same as the top N rows of t1 query I -COPY (values (5), (4), (3), (2), (1)) TO 'test_files/scratch/limit/t2.csv' +COPY (values (5), (4), (3), (2), (1)) TO 'test_files/scratch/joins/t2.csv' STORED AS CSV ---- 5 statement ok -create external table t1(a int) stored as CSV location 'test_files/scratch/limit/t1.csv'; +create external table t1(a int) stored as CSV location 'test_files/scratch/joins/t1.csv'; statement ok -create external table t2(b int) stored as CSV location 'test_files/scratch/limit/t2.csv'; +create external table t2(b int) stored as CSV location 'test_files/scratch/joins/t2.csv'; ###### ## LEFT JOIN w/ LIMIT @@ -4284,8 +4308,8 @@ logical_plan physical_plan 01)CoalesceBatchesExec: target_batch_size=3, fetch=2 02)--HashJoinExec: mode=CollectLeft, join_type=Left, on=[(a@0, b@0)] -03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/limit/t1.csv]]}, projection=[a], limit=2, file_type=csv, has_header=true -04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/limit/t2.csv]]}, projection=[b], file_type=csv, has_header=true +03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/joins/t1.csv]]}, projection=[a], limit=2, file_type=csv, has_header=true +04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/joins/t2.csv]]}, projection=[b], file_type=csv, has_header=true ###### ## RIGHT JOIN w/ LIMIT @@ -4318,8 +4342,8 @@ logical_plan physical_plan 01)CoalesceBatchesExec: target_batch_size=3, fetch=2 02)--HashJoinExec: mode=CollectLeft, join_type=Right, on=[(a@0, b@0)] -03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/limit/t1.csv]]}, projection=[a], file_type=csv, has_header=true -04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/limit/t2.csv]]}, projection=[b], limit=2, file_type=csv, has_header=true +03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/joins/t1.csv]]}, projection=[a], file_type=csv, has_header=true +04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/joins/t2.csv]]}, projection=[b], limit=2, file_type=csv, has_header=true ###### ## FULL JOIN w/ LIMIT @@ -4355,8 +4379,8 @@ logical_plan physical_plan 01)CoalesceBatchesExec: target_batch_size=3, fetch=2 02)--HashJoinExec: mode=CollectLeft, join_type=Full, on=[(a@0, b@0)] -03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/limit/t1.csv]]}, projection=[a], file_type=csv, has_header=true -04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/limit/t2.csv]]}, projection=[b], file_type=csv, has_header=true +03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/joins/t1.csv]]}, projection=[a], file_type=csv, has_header=true +04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/joins/t2.csv]]}, projection=[b], file_type=csv, has_header=true statement ok drop table t1; @@ -4429,11 +4453,9 @@ physical_plan 04)------CoalescePartitionsExec 05)--------CoalesceBatchesExec: target_batch_size=3 06)----------FilterExec: b@1 > 3, projection=[a@0] -07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 -08)--------------DataSourceExec: partitions=1, partition_sizes=[1] -09)------SortExec: expr=[c@2 DESC], preserve_partitioning=[true] -10)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 -11)----------DataSourceExec: partitions=1, partition_sizes=[1] +07)------------DataSourceExec: partitions=2, partition_sizes=[1, 1] +08)------SortExec: expr=[c@2 DESC], preserve_partitioning=[true] +09)--------DataSourceExec: partitions=2, partition_sizes=[1, 1] query TT explain select * from test where a in (select a from test where b > 3) order by c desc nulls last; @@ -4453,11 +4475,9 @@ physical_plan 04)------CoalescePartitionsExec 05)--------CoalesceBatchesExec: target_batch_size=3 06)----------FilterExec: b@1 > 3, projection=[a@0] -07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 -08)--------------DataSourceExec: partitions=1, partition_sizes=[1] -09)------SortExec: expr=[c@2 DESC NULLS LAST], preserve_partitioning=[true] -10)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 -11)----------DataSourceExec: partitions=1, partition_sizes=[1] +07)------------DataSourceExec: partitions=2, partition_sizes=[1, 1] +08)------SortExec: expr=[c@2 DESC NULLS LAST], preserve_partitioning=[true] +09)--------DataSourceExec: partitions=2, partition_sizes=[1, 1] query III select * from test where a in (select a from test where b > 3) order by c desc nulls first; @@ -4790,3 +4810,341 @@ DROP TABLE compound_field_table_t; statement ok DROP TABLE compound_field_table_u; + + +statement ok +CREATE TABLE t1 (k INT, v INT); + +statement ok +CREATE TABLE t2 (k INT, v INT); + +statement ok +INSERT INTO t1 + SELECT value AS k, value AS v + FROM range(1, 10001) AS t(value); + +statement ok +INSERT INTO t2 VALUES (1, 1); + +## The TopK(Sort with fetch) should not be pushed down to the hash join +query TT +explain +SELECT * +FROM t1 +LEFT ANTI JOIN t2 ON t1.k = t2.k +ORDER BY t1.k +LIMIT 2; +---- +logical_plan +01)Sort: t1.k ASC NULLS LAST, fetch=2 +02)--LeftAnti Join: t1.k = t2.k +03)----TableScan: t1 projection=[k, v] +04)----TableScan: t2 projection=[k] +physical_plan +01)SortExec: TopK(fetch=2), expr=[k@0 ASC NULLS LAST], preserve_partitioning=[false] +02)--CoalesceBatchesExec: target_batch_size=3 +03)----HashJoinExec: mode=CollectLeft, join_type=RightAnti, on=[(k@0, k@0)] +04)------DataSourceExec: partitions=1, partition_sizes=[1] +05)------DataSourceExec: partitions=1, partition_sizes=[3334] + + +query II +SELECT * +FROM t1 +LEFT ANTI JOIN t2 ON t1.k = t2.k +ORDER BY t1.k +LIMIT 2; +---- +2 2 +3 3 + + +## Test left anti join without limit, we should support push down sort to the left side +query TT +explain +SELECT * +FROM t1 +LEFT ANTI JOIN t2 ON t1.k = t2.k +ORDER BY t1.k; +---- +logical_plan +01)Sort: t1.k ASC NULLS LAST +02)--LeftAnti Join: t1.k = t2.k +03)----TableScan: t1 projection=[k, v] +04)----TableScan: t2 projection=[k] +physical_plan +01)CoalesceBatchesExec: target_batch_size=3 +02)--HashJoinExec: mode=CollectLeft, join_type=RightAnti, on=[(k@0, k@0)] +03)----DataSourceExec: partitions=1, partition_sizes=[1] +04)----SortExec: expr=[k@0 ASC NULLS LAST], preserve_partitioning=[false] +05)------DataSourceExec: partitions=1, partition_sizes=[3334] + +statement ok +DROP TABLE t1; + +statement ok +DROP TABLE t2; + + +# Test hash joins with an empty build relation (empty build relation optimization) + +statement ok +CREATE TABLE t1 (k1 int, v1 int); + +statement ok +CREATE TABLE t2 (k2 int, v2 int); + +statement ok +INSERT INTO t1 SELECT i AS k, 1 FROM generate_series(1, 30000) t(i); + +statement ok +set datafusion.explain.physical_plan_only = true; + +# INNER JOIN +query TT +EXPLAIN +SELECT * +FROM t1 +JOIN t2 ON k1 = k2 +---- +physical_plan +01)ProjectionExec: expr=[k1@2 as k1, v1@3 as v1, k2@0 as k2, v2@1 as v2] +02)--CoalesceBatchesExec: target_batch_size=3 +03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(k2@0, k1@0)] +04)------DataSourceExec: partitions=1, partition_sizes=[0] +05)------DataSourceExec: partitions=1, partition_sizes=[10000] + +query IIII +SELECT sum(k1), sum(v1), sum(k2), sum(v2) +FROM t1 +JOIN t2 ON k1 = k2 +---- +NULL NULL NULL NULL + +# LEFT JOIN +query TT +EXPLAIN +SELECT * +FROM t1 +LEFT JOIN t2 ON k1 = k2 +---- +physical_plan +01)ProjectionExec: expr=[k1@2 as k1, v1@3 as v1, k2@0 as k2, v2@1 as v2] +02)--CoalesceBatchesExec: target_batch_size=3 +03)----HashJoinExec: mode=CollectLeft, join_type=Right, on=[(k2@0, k1@0)] +04)------DataSourceExec: partitions=1, partition_sizes=[0] +05)------DataSourceExec: partitions=1, partition_sizes=[10000] + +query IIII +SELECT sum(k1), sum(v1), sum(k2), sum(v2) +FROM t1 +LEFT JOIN t2 ON k1 = k2 +---- +450015000 30000 NULL NULL + +# RIGHT JOIN +query TT +EXPLAIN +SELECT * +FROM t1 +RIGHT JOIN t2 ON k1 = k2 +---- +physical_plan +01)ProjectionExec: expr=[k1@2 as k1, v1@3 as v1, k2@0 as k2, v2@1 as v2] +02)--CoalesceBatchesExec: target_batch_size=3 +03)----HashJoinExec: mode=CollectLeft, join_type=Left, on=[(k2@0, k1@0)] +04)------DataSourceExec: partitions=1, partition_sizes=[0] +05)------DataSourceExec: partitions=1, partition_sizes=[10000] + +query IIII +SELECT sum(k1), sum(v1), sum(k2), sum(v2) +FROM t1 +RIGHT JOIN t2 ON k1 = k2 +---- +NULL NULL NULL NULL + +# LEFT SEMI JOIN +query TT +EXPLAIN +SELECT * +FROM t1 +LEFT SEMI JOIN t2 ON k1 = k2 +---- +physical_plan +01)CoalesceBatchesExec: target_batch_size=3 +02)--HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(k2@0, k1@0)] +03)----DataSourceExec: partitions=1, partition_sizes=[0] +04)----DataSourceExec: partitions=1, partition_sizes=[10000] + +query II +SELECT sum(k1), sum(v1) +FROM t1 +LEFT SEMI JOIN t2 ON k1 = k2 +---- +NULL NULL + +# RIGHT SEMI JOIN +query TT +EXPLAIN +SELECT * +FROM t1 +RIGHT SEMI JOIN t2 ON k1 = k2 +---- +physical_plan +01)CoalesceBatchesExec: target_batch_size=3 +02)--HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(k2@0, k1@0)] +03)----DataSourceExec: partitions=1, partition_sizes=[0] +04)----DataSourceExec: partitions=1, partition_sizes=[10000] + +query II +SELECT sum(k2), sum(v2) +FROM t1 +RIGHT SEMI JOIN t2 ON k1 = k2 +---- +NULL NULL + +# LEFT ANTI JOIN +query TT +EXPLAIN +SELECT * +FROM t1 +LEFT ANTI JOIN t2 ON k1 = k2 +---- +physical_plan +01)CoalesceBatchesExec: target_batch_size=3 +02)--HashJoinExec: mode=CollectLeft, join_type=RightAnti, on=[(k2@0, k1@0)] +03)----DataSourceExec: partitions=1, partition_sizes=[0] +04)----DataSourceExec: partitions=1, partition_sizes=[10000] + +query II +SELECT sum(k1), sum(v1) +FROM t1 +LEFT ANTI JOIN t2 ON k1 = k2 +---- +450015000 30000 + +# RIGHT ANTI JOIN +query TT +EXPLAIN +SELECT * +FROM t1 +RIGHT ANTI JOIN t2 ON k1 = k2 +---- +physical_plan +01)CoalesceBatchesExec: target_batch_size=3 +02)--HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(k2@0, k1@0)] +03)----DataSourceExec: partitions=1, partition_sizes=[0] +04)----DataSourceExec: partitions=1, partition_sizes=[10000] + +query II +SELECT sum(k2), sum(v2) +FROM t1 +RIGHT ANTI JOIN t2 ON k1 = k2 +---- +NULL NULL + +# FULL JOIN +query TT +EXPLAIN +SELECT * +FROM t1 +FULL JOIN t2 ON k1 = k2 +---- +physical_plan +01)ProjectionExec: expr=[k1@2 as k1, v1@3 as v1, k2@0 as k2, v2@1 as v2] +02)--CoalesceBatchesExec: target_batch_size=3 +03)----HashJoinExec: mode=CollectLeft, join_type=Full, on=[(k2@0, k1@0)] +04)------DataSourceExec: partitions=1, partition_sizes=[0] +05)------DataSourceExec: partitions=1, partition_sizes=[10000] + +query IIII +SELECT sum(k1), sum(v1), sum(k2), sum(v2) +FROM t1 +FULL JOIN t2 ON k1 = k2 +---- +450015000 30000 NULL NULL + +# LEFT MARK JOIN +query TT +EXPLAIN +SELECT * +FROM t2 +WHERE k2 > 0 + OR EXISTS ( + SELECT * + FROM t1 + WHERE k2 = k1 + ) +---- +physical_plan +01)CoalesceBatchesExec: target_batch_size=3 +02)--FilterExec: k2@0 > 0 OR mark@2, projection=[k2@0, v2@1] +03)----CoalesceBatchesExec: target_batch_size=3 +04)------HashJoinExec: mode=CollectLeft, join_type=LeftMark, on=[(k2@0, k1@0)] +05)--------DataSourceExec: partitions=1, partition_sizes=[0] +06)--------DataSourceExec: partitions=1, partition_sizes=[10000] + +query II +SELECT * +FROM t2 +WHERE k2 > 0 + OR EXISTS ( + SELECT * + FROM t1 + WHERE k2 = k1 + ) +---- + +# Projection inside the join (changes the output schema) +query TT +EXPLAIN +SELECT distinct(v1) +FROM t1 +LEFT ANTI JOIN t2 ON k1 = k2 +---- +physical_plan +01)AggregateExec: mode=Single, gby=[v1@0 as v1], aggr=[] +02)--CoalesceBatchesExec: target_batch_size=3 +03)----HashJoinExec: mode=CollectLeft, join_type=RightAnti, on=[(k2@0, k1@0)], projection=[v1@1] +04)------DataSourceExec: partitions=1, partition_sizes=[0] +05)------DataSourceExec: partitions=1, partition_sizes=[10000] + +query I +SELECT distinct(v1) +FROM t1 +LEFT ANTI JOIN t2 ON k1 = k2 +---- +1 + +# Both sides empty +query TT +EXPLAIN +SELECT * +FROM t1 +LEFT ANTI JOIN t2 ON k1 = k2 +WHERE k1 < 0 +---- +physical_plan +01)CoalesceBatchesExec: target_batch_size=3 +02)--HashJoinExec: mode=CollectLeft, join_type=RightAnti, on=[(k2@0, k1@0)] +03)----DataSourceExec: partitions=1, partition_sizes=[0] +04)----CoalesceBatchesExec: target_batch_size=3 +05)------FilterExec: k1@0 < 0 +06)--------DataSourceExec: partitions=1, partition_sizes=[10000] + +query II +SELECT * +FROM t1 +LEFT ANTI JOIN t2 ON k1 = k2 +WHERE k1 < 0 +---- + + +statement ok +DROP TABLE t1; + +statement ok +DROP TABLE t2; + +statement ok +set datafusion.explain.physical_plan_only = false; diff --git a/datafusion/sqllogictest/test_files/limit.slt b/datafusion/sqllogictest/test_files/limit.slt index b46d15cb962aa..77850c6ae7c5f 100644 --- a/datafusion/sqllogictest/test_files/limit.slt +++ b/datafusion/sqllogictest/test_files/limit.slt @@ -663,15 +663,14 @@ logical_plan physical_plan 01)GlobalLimitExec: skip=4, fetch=10 02)--SortPreservingMergeExec: [c@0 DESC], fetch=14 -03)----UnionExec -04)------SortExec: TopK(fetch=14), expr=[c@0 DESC], preserve_partitioning=[true] +03)----SortExec: TopK(fetch=14), expr=[c@0 DESC], preserve_partitioning=[true] +04)------UnionExec 05)--------ProjectionExec: expr=[CAST(c@0 AS Int64) as c] 06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], output_ordering=[c@0 ASC NULLS LAST], file_type=csv, has_header=true -08)------SortExec: TopK(fetch=14), expr=[c@0 DESC], preserve_partitioning=[true] -09)--------ProjectionExec: expr=[CAST(d@0 AS Int64) as c] -10)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -11)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[d], file_type=csv, has_header=true +08)--------ProjectionExec: expr=[CAST(d@0 AS Int64) as c] +09)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +10)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[d], file_type=csv, has_header=true # Applying LIMIT & OFFSET to subquery. query III diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt index 56481936e726e..4f1e5ef39a00d 100644 --- a/datafusion/sqllogictest/test_files/map.slt +++ b/datafusion/sqllogictest/test_files/map.slt @@ -833,5 +833,12 @@ select column3[true] from tt; ---- 3 +# https://github.com/apache/datafusion/issues/16187 +# should be NULL in case of out of bounds for Null Type +query ? +select map_values(map([named_struct('a', 1, 'b', null)], [named_struct('a', 1, 'b', null)]))[0] as a; +---- +NULL + statement ok drop table tt; diff --git a/datafusion/sqllogictest/test_files/order.slt b/datafusion/sqllogictest/test_files/order.slt index 3fc90a6459f27..e3bcfcdbda1d5 100644 --- a/datafusion/sqllogictest/test_files/order.slt +++ b/datafusion/sqllogictest/test_files/order.slt @@ -1258,13 +1258,12 @@ logical_plan 08)--------TableScan: ordered_table projection=[a0, b, c, d] physical_plan 01)SortPreservingMergeExec: [d@4 ASC NULLS LAST, c@1 ASC NULLS LAST, a@2 ASC NULLS LAST, a0@3 ASC NULLS LAST, b@0 ASC NULLS LAST], fetch=2 -02)--UnionExec -03)----SortExec: TopK(fetch=2), expr=[d@4 ASC NULLS LAST, c@1 ASC NULLS LAST, a@2 ASC NULLS LAST, b@0 ASC NULLS LAST], preserve_partitioning=[false] +02)--SortExec: TopK(fetch=2), expr=[d@4 ASC NULLS LAST, c@1 ASC NULLS LAST, a@2 ASC NULLS LAST, a0@3 ASC NULLS LAST, b@0 ASC NULLS LAST], preserve_partitioning=[true] +03)----UnionExec 04)------ProjectionExec: expr=[b@1 as b, c@2 as c, a@0 as a, NULL as a0, d@3 as d] 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_ordering=[c@2 ASC NULLS LAST], file_type=csv, has_header=true -06)----SortExec: TopK(fetch=2), expr=[d@4 ASC NULLS LAST, c@1 ASC NULLS LAST, a0@3 ASC NULLS LAST, b@0 ASC NULLS LAST], preserve_partitioning=[false] -07)------ProjectionExec: expr=[b@1 as b, c@2 as c, NULL as a, a0@0 as a0, d@3 as d] -08)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, b, c, d], output_ordering=[c@2 ASC NULLS LAST], file_type=csv, has_header=true +06)------ProjectionExec: expr=[b@1 as b, c@2 as c, NULL as a, a0@0 as a0, d@3 as d] +07)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, b, c, d], output_ordering=[c@2 ASC NULLS LAST], file_type=csv, has_header=true # Test: run the query from above query IIIII diff --git a/datafusion/sqllogictest/test_files/parquet.slt b/datafusion/sqllogictest/test_files/parquet.slt index 33bb052baa519..51e40e3e685d0 100644 --- a/datafusion/sqllogictest/test_files/parquet.slt +++ b/datafusion/sqllogictest/test_files/parquet.slt @@ -130,8 +130,7 @@ STORED AS PARQUET; ---- 3 -# Check output plan again, expect no "output_ordering" clause in the physical_plan -> ParquetExec, -# due to there being more files than partitions: +# Check output plan again query TT EXPLAIN SELECT int_col, string_col FROM test_table @@ -142,8 +141,7 @@ logical_plan 02)--TableScan: test_table projection=[int_col, string_col] physical_plan 01)SortPreservingMergeExec: [string_col@1 ASC NULLS LAST, int_col@0 ASC NULLS LAST] -02)--SortExec: expr=[string_col@1 ASC NULLS LAST, int_col@0 ASC NULLS LAST], preserve_partitioning=[true] -03)----DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_table/0.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_table/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_table/2.parquet]]}, projection=[int_col, string_col], file_type=parquet +02)--DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_table/0.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_table/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_table/2.parquet]]}, projection=[int_col, string_col], output_ordering=[string_col@1 ASC NULLS LAST, int_col@0 ASC NULLS LAST], file_type=parquet # Perform queries using MIN and MAX diff --git a/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt b/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt index e94751548b222..24e76a570c009 100644 --- a/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt +++ b/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt @@ -113,6 +113,44 @@ physical_plan 02)--SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true] 03)----DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/2.parquet]]}, projection=[a], file_type=parquet, predicate=b@1 > 2, pruning_predicate=b_null_count@1 != row_count@2 AND b_max@0 > 2, required_guarantees=[] +query T +select a from t where b = 2 ORDER BY b; +---- +bar + +query T +select a from t_pushdown where b = 2 ORDER BY b; +---- +bar + +query TT +EXPLAIN select a from t where b = 2 ORDER BY b; +---- +logical_plan +01)Projection: t.a +02)--Sort: t.b ASC NULLS LAST +03)----Filter: t.b = Int32(2) +04)------TableScan: t projection=[a, b], partial_filters=[t.b = Int32(2)] +physical_plan +01)CoalescePartitionsExec +02)--ProjectionExec: expr=[a@0 as a] +03)----CoalesceBatchesExec: target_batch_size=8192 +04)------FilterExec: b@1 = 2 +05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 +06)----------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/2.parquet]]}, projection=[a, b], file_type=parquet, predicate=b@1 = 2, pruning_predicate=b_null_count@2 != row_count@3 AND b_min@0 <= 2 AND 2 <= b_max@1, required_guarantees=[b in (2)] + +query TT +EXPLAIN select a from t_pushdown where b = 2 ORDER BY b; +---- +logical_plan +01)Projection: t_pushdown.a +02)--Sort: t_pushdown.b ASC NULLS LAST +03)----Filter: t_pushdown.b = Int32(2) +04)------TableScan: t_pushdown projection=[a, b], partial_filters=[t_pushdown.b = Int32(2)] +physical_plan +01)CoalescePartitionsExec +02)--DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/2.parquet]]}, projection=[a], file_type=parquet, predicate=b@1 = 2, pruning_predicate=b_null_count@2 != row_count@3 AND b_min@0 <= 2 AND 2 <= b_max@1, required_guarantees=[b in (2)] + # If we set the setting to `true` it override's the table's setting statement ok set datafusion.execution.parquet.pushdown_filters = true; @@ -161,6 +199,40 @@ physical_plan 02)--SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true] 03)----DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/2.parquet]]}, projection=[a], file_type=parquet, predicate=b@1 > 2, pruning_predicate=b_null_count@1 != row_count@2 AND b_max@0 > 2, required_guarantees=[] +query T +select a from t where b = 2 ORDER BY b; +---- +bar + +query T +select a from t_pushdown where b = 2 ORDER BY b; +---- +bar + +query TT +EXPLAIN select a from t where b = 2 ORDER BY b; +---- +logical_plan +01)Projection: t.a +02)--Sort: t.b ASC NULLS LAST +03)----Filter: t.b = Int32(2) +04)------TableScan: t projection=[a, b], partial_filters=[t.b = Int32(2)] +physical_plan +01)CoalescePartitionsExec +02)--DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/2.parquet]]}, projection=[a], file_type=parquet, predicate=b@1 = 2, pruning_predicate=b_null_count@2 != row_count@3 AND b_min@0 <= 2 AND 2 <= b_max@1, required_guarantees=[b in (2)] + +query TT +EXPLAIN select a from t_pushdown where b = 2 ORDER BY b; +---- +logical_plan +01)Projection: t_pushdown.a +02)--Sort: t_pushdown.b ASC NULLS LAST +03)----Filter: t_pushdown.b = Int32(2) +04)------TableScan: t_pushdown projection=[a, b], partial_filters=[t_pushdown.b = Int32(2)] +physical_plan +01)CoalescePartitionsExec +02)--DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/2.parquet]]}, projection=[a], file_type=parquet, predicate=b@1 = 2, pruning_predicate=b_null_count@2 != row_count@3 AND b_min@0 <= 2 AND 2 <= b_max@1, required_guarantees=[b in (2)] + # If we reset the default the table created without pushdown goes back to disabling it statement ok set datafusion.execution.parquet.pushdown_filters = false; @@ -212,6 +284,44 @@ physical_plan 02)--SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true] 03)----DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/2.parquet]]}, projection=[a], file_type=parquet, predicate=b@1 > 2, pruning_predicate=b_null_count@1 != row_count@2 AND b_max@0 > 2, required_guarantees=[] +query T +select a from t where b = 2 ORDER BY b; +---- +bar + +query T +select a from t_pushdown where b = 2 ORDER BY b; +---- +bar + +query TT +EXPLAIN select a from t where b = 2 ORDER BY b; +---- +logical_plan +01)Projection: t.a +02)--Sort: t.b ASC NULLS LAST +03)----Filter: t.b = Int32(2) +04)------TableScan: t projection=[a, b], partial_filters=[t.b = Int32(2)] +physical_plan +01)CoalescePartitionsExec +02)--ProjectionExec: expr=[a@0 as a] +03)----CoalesceBatchesExec: target_batch_size=8192 +04)------FilterExec: b@1 = 2 +05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 +06)----------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/2.parquet]]}, projection=[a, b], file_type=parquet, predicate=b@1 = 2, pruning_predicate=b_null_count@2 != row_count@3 AND b_min@0 <= 2 AND 2 <= b_max@1, required_guarantees=[b in (2)] + +query TT +EXPLAIN select a from t_pushdown where b = 2 ORDER BY b; +---- +logical_plan +01)Projection: t_pushdown.a +02)--Sort: t_pushdown.b ASC NULLS LAST +03)----Filter: t_pushdown.b = Int32(2) +04)------TableScan: t_pushdown projection=[a, b], partial_filters=[t_pushdown.b = Int32(2)] +physical_plan +01)CoalescePartitionsExec +02)--DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/2.parquet]]}, projection=[a], file_type=parquet, predicate=b@1 = 2, pruning_predicate=b_null_count@2 != row_count@3 AND b_min@0 <= 2 AND 2 <= b_max@1, required_guarantees=[b in (2)] + # When filter pushdown *is* enabled, ParquetExec can filter exactly, # not just metadata, so we expect to see no FilterExec query T @@ -239,6 +349,23 @@ physical_plan 05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 06)----------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/2.parquet]]}, projection=[a, b], file_type=parquet, predicate=b@1 > 2, pruning_predicate=b_null_count@1 != row_count@2 AND b_max@0 > 2, required_guarantees=[] +query T +select a from t_pushdown where b = 2 ORDER BY b; +---- +bar + +query TT +EXPLAIN select a from t_pushdown where b = 2 ORDER BY b; +---- +logical_plan +01)Projection: t_pushdown.a +02)--Sort: t_pushdown.b ASC NULLS LAST +03)----Filter: t_pushdown.b = Int32(2) +04)------TableScan: t_pushdown projection=[a, b], partial_filters=[t_pushdown.b = Int32(2)] +physical_plan +01)CoalescePartitionsExec +02)--DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/2.parquet]]}, projection=[a], file_type=parquet, predicate=b@1 = 2, pruning_predicate=b_null_count@2 != row_count@3 AND b_min@0 <= 2 AND 2 <= b_max@1, required_guarantees=[b in (2)] + # also test querying on columns that are not in all the files query T select a from t_pushdown where b > 2 AND a IS NOT NULL order by a; diff --git a/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt b/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt index a10243f627209..fe909e70ffb00 100644 --- a/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt +++ b/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt @@ -38,20 +38,22 @@ CREATE TABLE src_table ( bigint_col BIGINT, date_col DATE, overlapping_col INT, - constant_col INT + constant_col INT, + nulls_first_col INT, + nulls_last_col INT ) AS VALUES -- first file -(1, 3, 'aaa', 100, 1, 0, 0), -(2, 2, 'bbb', 200, 2, 1, 0), -(3, 1, 'ccc', 300, 3, 2, 0), +(1, 3, 'aaa', 100, 1, 0, 0, NULL, 1), +(2, 2, 'bbb', 200, 2, 1, 0, NULL, 2), +(3, 1, 'ccc', 300, 3, 2, 0, 1, 3), -- second file -(4, 6, 'ddd', 400, 4, 0, 0), -(5, 5, 'eee', 500, 5, 1, 0), -(6, 4, 'fff', 600, 6, 2, 0), +(4, 6, 'ddd', 400, 4, 0, 0, 2, 4), +(5, 5, 'eee', 500, 5, 1, 0, 3, 5), +(6, 4, 'fff', 600, 6, 2, 0, 4, 6), -- third file -(7, 9, 'ggg', 700, 7, 3, 0), -(8, 8, 'hhh', 800, 8, 4, 0), -(9, 7, 'iii', 900, 9, 5, 0); +(7, 9, 'ggg', 700, 7, 3, 0, 5, 7), +(8, 8, 'hhh', 800, 8, 4, 0, 6, NULL), +(9, 7, 'iii', 900, 9, 5, 0, 7, NULL); # Setup 3 files, in particular more files than there are partitions @@ -90,11 +92,18 @@ CREATE EXTERNAL TABLE test_table ( bigint_col BIGINT NOT NULL, date_col DATE NOT NULL, overlapping_col INT NOT NULL, - constant_col INT NOT NULL + constant_col INT NOT NULL, + nulls_first_col INT, + nulls_last_col INT ) STORED AS PARQUET PARTITIONED BY (partition_col) -WITH ORDER (int_col ASC NULLS LAST, bigint_col ASC NULLS LAST) +WITH ORDER ( + int_col ASC NULLS LAST, + bigint_col ASC NULLS LAST, + nulls_first_col ASC NULLS FIRST, + nulls_last_col ASC NULLS LAST +) LOCATION 'test_files/scratch/parquet_sorted_statistics/test_table'; # Order by numeric columns @@ -102,33 +111,33 @@ LOCATION 'test_files/scratch/parquet_sorted_statistics/test_table'; # DataFusion doesn't currently support string column statistics # This should not require a sort. query TT -EXPLAIN SELECT int_col, bigint_col +EXPLAIN SELECT int_col, bigint_col, nulls_first_col, nulls_last_col FROM test_table -ORDER BY int_col, bigint_col; +ORDER BY int_col, bigint_col, nulls_first_col NULLS FIRST, nulls_last_col NULLS LAST; ---- logical_plan -01)Sort: test_table.int_col ASC NULLS LAST, test_table.bigint_col ASC NULLS LAST -02)--TableScan: test_table projection=[int_col, bigint_col] +01)Sort: test_table.int_col ASC NULLS LAST, test_table.bigint_col ASC NULLS LAST, test_table.nulls_first_col ASC NULLS FIRST, test_table.nulls_last_col ASC NULLS LAST +02)--TableScan: test_table projection=[int_col, bigint_col, nulls_first_col, nulls_last_col] physical_plan -01)SortPreservingMergeExec: [int_col@0 ASC NULLS LAST, bigint_col@1 ASC NULLS LAST] -02)--DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=A/0.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=C/2.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=B/1.parquet]]}, projection=[int_col, bigint_col], output_ordering=[int_col@0 ASC NULLS LAST, bigint_col@1 ASC NULLS LAST], file_type=parquet +01)SortPreservingMergeExec: [int_col@0 ASC NULLS LAST, bigint_col@1 ASC NULLS LAST, nulls_first_col@2 ASC, nulls_last_col@3 ASC NULLS LAST] +02)--DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=A/0.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=C/2.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=B/1.parquet]]}, projection=[int_col, bigint_col, nulls_first_col, nulls_last_col], output_ordering=[int_col@0 ASC NULLS LAST, bigint_col@1 ASC NULLS LAST, nulls_first_col@2 ASC, nulls_last_col@3 ASC NULLS LAST], file_type=parquet # Another planning test, but project on a column with unsupported statistics # We should be able to ignore this and look at only the relevant statistics query TT EXPLAIN SELECT string_col FROM test_table -ORDER BY int_col, bigint_col; +ORDER BY int_col, bigint_col, nulls_first_col NULLS FIRST, nulls_last_col NULLS LAST; ---- logical_plan 01)Projection: test_table.string_col -02)--Sort: test_table.int_col ASC NULLS LAST, test_table.bigint_col ASC NULLS LAST -03)----Projection: test_table.string_col, test_table.int_col, test_table.bigint_col -04)------TableScan: test_table projection=[int_col, string_col, bigint_col] +02)--Sort: test_table.int_col ASC NULLS LAST, test_table.bigint_col ASC NULLS LAST, test_table.nulls_first_col ASC NULLS FIRST, test_table.nulls_last_col ASC NULLS LAST +03)----Projection: test_table.string_col, test_table.int_col, test_table.bigint_col, test_table.nulls_first_col, test_table.nulls_last_col +04)------TableScan: test_table projection=[int_col, string_col, bigint_col, nulls_first_col, nulls_last_col] physical_plan 01)ProjectionExec: expr=[string_col@0 as string_col] -02)--SortPreservingMergeExec: [int_col@1 ASC NULLS LAST, bigint_col@2 ASC NULLS LAST] -03)----DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=A/0.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=C/2.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=B/1.parquet]]}, projection=[string_col, int_col, bigint_col], output_ordering=[int_col@1 ASC NULLS LAST, bigint_col@2 ASC NULLS LAST], file_type=parquet +02)--SortPreservingMergeExec: [int_col@1 ASC NULLS LAST, bigint_col@2 ASC NULLS LAST, nulls_first_col@3 ASC, nulls_last_col@4 ASC NULLS LAST] +03)----DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=A/0.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=C/2.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=B/1.parquet]]}, projection=[string_col, int_col, bigint_col, nulls_first_col, nulls_last_col], output_ordering=[int_col@1 ASC NULLS LAST, bigint_col@2 ASC NULLS LAST, nulls_first_col@3 ASC, nulls_last_col@4 ASC NULLS LAST], file_type=parquet # Clean up & recreate but sort on descending column statement ok diff --git a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_null.slt b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_null.slt index d14b6ca81f67e..fcc12226e47c5 100644 --- a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_null.slt +++ b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_null.slt @@ -48,7 +48,7 @@ COPY aggregate_test_100_by_sql ### ## Setup test for datafusion ### -onlyif DataFusion +skipif postgres statement ok CREATE EXTERNAL TABLE aggregate_test_100_by_sql ( c1 VARCHAR NOT NULL, diff --git a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_simple.slt b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_simple.slt index 25b4924715caa..4453aa1489a1b 100644 --- a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_simple.slt +++ b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_simple.slt @@ -49,7 +49,7 @@ COPY aggregate_test_100_by_sql ### ## Setup test for datafusion ### -onlyif DataFusion +skipif postgres statement ok CREATE EXTERNAL TABLE aggregate_test_100_by_sql ( c1 VARCHAR NOT NULL, diff --git a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_union.slt b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_union.slt index e02c19016790d..f8e0770271309 100644 --- a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_union.slt +++ b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_union.slt @@ -46,7 +46,7 @@ COPY aggregate_test_100_by_sql ### ## Setup test for datafusion ### -onlyif DataFusion +skipif postgres statement ok CREATE EXTERNAL TABLE aggregate_test_100_by_sql ( c1 VARCHAR NOT NULL, diff --git a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_window.slt b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_window.slt index edad3747a2030..f967d79a6d952 100644 --- a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_window.slt +++ b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_window.slt @@ -46,7 +46,7 @@ COPY aggregate_test_100_by_sql ### ## Setup test for datafusion ### -onlyif DataFusion +skipif postgres statement ok CREATE EXTERNAL TABLE aggregate_test_100_by_sql ( c1 VARCHAR NOT NULL, diff --git a/datafusion/sqllogictest/test_files/push_down_filter.slt b/datafusion/sqllogictest/test_files/push_down_filter.slt index a0d3193324628..6e2972d96a6f4 100644 --- a/datafusion/sqllogictest/test_files/push_down_filter.slt +++ b/datafusion/sqllogictest/test_files/push_down_filter.slt @@ -39,9 +39,9 @@ physical_plan 01)ProjectionExec: expr=[__unnest_placeholder(v.column2,depth=1)@0 as uc2] 02)--UnnestExec 03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -04)------ProjectionExec: expr=[column2@1 as __unnest_placeholder(v.column2), column1@0 as column1] +04)------ProjectionExec: expr=[column2@0 as __unnest_placeholder(v.column2)] 05)--------CoalesceBatchesExec: target_batch_size=8192 -06)----------FilterExec: column1@0 = 2 +06)----------FilterExec: column1@0 = 2, projection=[column2@1] 07)------------DataSourceExec: partitions=1, partition_sizes=[1] query I @@ -59,10 +59,9 @@ physical_plan 02)--CoalesceBatchesExec: target_batch_size=8192 03)----FilterExec: __unnest_placeholder(v.column2,depth=1)@0 > 3 04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -05)--------ProjectionExec: expr=[__unnest_placeholder(v.column2,depth=1)@0 as __unnest_placeholder(v.column2,depth=1)] -06)----------UnnestExec -07)------------ProjectionExec: expr=[column2@1 as __unnest_placeholder(v.column2), column1@0 as column1] -08)--------------DataSourceExec: partitions=1, partition_sizes=[1] +05)--------UnnestExec +06)----------ProjectionExec: expr=[column2@0 as __unnest_placeholder(v.column2)] +07)------------DataSourceExec: partitions=1, partition_sizes=[1] query II select uc2, column1 from (select unnest(column2) as uc2, column1 from v) where uc2 > 3 AND column1 = 2; @@ -129,12 +128,31 @@ physical_plan 06)----------ProjectionExec: expr=[column1@0 as column1, column2@1 as __unnest_placeholder(d.column2)] 07)------------DataSourceExec: partitions=1, partition_sizes=[1] +statement ok +drop table d; + +statement ok +CREATE TABLE d AS VALUES (named_struct('a', 1, 'b', 2)), (named_struct('a', 3, 'b', 4)), (named_struct('a', 5, 'b', 6)); +query II +select * from (select unnest(column1) from d) where "__unnest_placeholder(d.column1).b" > 5; +---- +5 6 + +query TT +explain select * from (select unnest(column1) from d) where "__unnest_placeholder(d.column1).b" > 5; +---- +physical_plan +01)CoalesceBatchesExec: target_batch_size=8192 +02)--FilterExec: __unnest_placeholder(d.column1).b@1 > 5 +03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +04)------UnnestExec +05)--------ProjectionExec: expr=[column1@0 as __unnest_placeholder(d.column1)] +06)----------DataSourceExec: partitions=1, partition_sizes=[1] statement ok drop table d; - # Test push down filter with limit for parquet statement ok set datafusion.execution.parquet.pushdown_filters = true; diff --git a/datafusion/sqllogictest/test_files/regexp/regexp_instr.slt b/datafusion/sqllogictest/test_files/regexp/regexp_instr.slt new file mode 100644 index 0000000000000..d4e98e6431678 --- /dev/null +++ b/datafusion/sqllogictest/test_files/regexp/regexp_instr.slt @@ -0,0 +1,196 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Import common test data +include ./init_data.slt.part + +query I +SELECT regexp_instr('123123123123123', '(12)3'); +---- +1 + +query I +SELECT regexp_instr('123123123123', '123', 1); +---- +1 + +query I +SELECT regexp_instr('123123123123', '123', 3); +---- +4 + +query I +SELECT regexp_instr('123123123123', '123', 33); +---- +0 + +query I +SELECT regexp_instr('ABCABCABCABC', 'Abc', 1, 2, ''); +---- +0 + +query I +SELECT regexp_instr('ABCABCABCABC', 'Abc', 1, 2, 'i'); +---- +4 + +query I +SELECT + regexp_instr( + 'The quick brown fox jumps over the lazy dog.', + ' (quick) (brown) (fox)', + 1, + 1, + 'i', + 2 -- subexpression_number (2 for second group) + ); +---- +11 + +statement error +External error: query failed: DataFusion error: Arrow error: Compute error: regexp_instr() requires start to be 1 based +SELECT regexp_instr('123123123123', '123', 0); + +statement error +External error: query failed: DataFusion error: Arrow error: Compute error: regexp_instr() requires start to be 1 based +SELECT regexp_instr('123123123123', '123', -3); + +query I +SELECT regexp_instr(str, pattern) FROM regexp_test_data; +---- +NULL +1 +1 +0 +0 +0 +0 +1 +1 +1 +1 +1 + +query I +SELECT regexp_instr(str, pattern, start) FROM regexp_test_data; +---- +NULL +1 +1 +0 +0 +0 +0 +0 +3 +4 +1 +2 + + +statement ok +CREATE TABLE t_stringview AS +SELECT + arrow_cast(str, 'Utf8View') AS str, + arrow_cast(pattern, 'Utf8View') AS pattern, + arrow_cast(start, 'Int64') AS start +FROM regexp_test_data; + +query I +SELECT regexp_instr(str, pattern, start) FROM t_stringview; +---- +NULL +1 +1 +0 +0 +0 +0 +0 +3 +4 +1 +2 + +query I +SELECT regexp_instr( + arrow_cast(str, 'Utf8'), + arrow_cast(pattern, 'LargeUtf8'), + arrow_cast(start, 'Int32') +) FROM t_stringview; +---- +NULL +1 +1 +0 +0 +0 +0 +0 +3 +4 +1 +2 + +query I +SELECT regexp_instr(NULL, NULL); +---- +NULL + +query I +SELECT regexp_instr(NULL, 'a'); +---- +NULL + +query I +SELECT regexp_instr('a', NULL); +---- +NULL + +query I +SELECT regexp_instr('😀abcdef', 'abc'); +---- +2 + + +statement ok +CREATE TABLE empty_table (str varchar, pattern varchar, start int); + +query I +SELECT regexp_instr(str, pattern, start) FROM empty_table; +---- + +statement ok +INSERT INTO empty_table VALUES + ('a', NULL, 1), + (NULL, 'a', 1), + (NULL, NULL, 1), + (NULL, NULL, NULL); + +query I +SELECT regexp_instr(str, pattern, start) FROM empty_table; +---- +NULL +NULL +NULL +NULL + +statement ok +DROP TABLE t_stringview; + +statement ok +DROP TABLE empty_table; diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt index 9febf06b25103..109c2f209ad92 100644 --- a/datafusion/sqllogictest/test_files/select.slt +++ b/datafusion/sqllogictest/test_files/select.slt @@ -1875,10 +1875,57 @@ drop table t; # test "user" column # See https://github.com/apache/datafusion/issues/14141 statement count 0 -create table t_with_user(a int, user text) as values (1,'test'), (2,null); +create table t_with_user(a int, user text) as values (1,'test'), (2,null), (3,'foo'); query T select t_with_user.user from t_with_user; ---- test NULL +foo + +query IT +select * from t_with_user where t_with_user.user = 'foo'; +---- +3 foo + +query T +select user from t_with_user; +---- +test +NULL +foo + +query IT +select * from t_with_user where user = 'foo'; +---- +3 foo + +# test "current_time" column +# See https://github.com/apache/datafusion/issues/14141 +statement count 0 +create table t_with_current_time(a int, current_time text) as values (1,'now'), (2,null), (3,'later'); + +# here it's clear the the column was meant +query B +select t_with_current_time.current_time is not null from t_with_current_time; +---- +true +false +true + +# here it's the function +query B +select current_time is not null from t_with_current_time; +---- +true +true +true + +# and here it's the column again +query B +select "current_time" is not null from t_with_current_time; +---- +true +false +true diff --git a/datafusion/sqllogictest/test_files/spark/datetime/last_day.slt b/datafusion/sqllogictest/test_files/spark/datetime/last_day.slt index 29fb9ca11b0e0..da3dd9711b941 100644 --- a/datafusion/sqllogictest/test_files/spark/datetime/last_day.slt +++ b/datafusion/sqllogictest/test_files/spark/datetime/last_day.slt @@ -21,7 +21,99 @@ # For more information, please see: # https://github.com/apache/datafusion/issues/15914 -## Original Query: SELECT last_day('2009-01-12'); -## PySpark 3.5.5 Result: {'last_day(2009-01-12)': datetime.date(2009, 1, 31), 'typeof(last_day(2009-01-12))': 'date', 'typeof(2009-01-12)': 'string'} -#query -#SELECT last_day('2009-01-12'::string); +query D +SELECT last_day('2009-01-12'::DATE); +---- +2009-01-31 + + +query D +SELECT last_day('2015-02-28'::DATE); +---- +2015-02-28 + +query D +SELECT last_day('2015-03-27'::DATE); +---- +2015-03-31 + +query D +SELECT last_day('2015-04-26'::DATE); +---- +2015-04-30 + +query D +SELECT last_day('2015-05-25'::DATE); +---- +2015-05-31 + +query D +SELECT last_day('2015-06-24'::DATE); +---- +2015-06-30 + +query D +SELECT last_day('2015-07-23'::DATE); +---- +2015-07-31 + +query D +SELECT last_day('2015-08-01'::DATE); +---- +2015-08-31 + +query D +SELECT last_day('2015-09-02'::DATE); +---- +2015-09-30 + +query D +SELECT last_day('2015-10-03'::DATE); +---- +2015-10-31 + +query D +SELECT last_day('2015-11-04'::DATE); +---- +2015-11-30 + +query D +SELECT last_day('2015-12-05'::DATE); +---- +2015-12-31 + + +query D +SELECT last_day('2016-01-06'::DATE); +---- +2016-01-31 + +query D +SELECT last_day('2016-02-07'::DATE); +---- +2016-02-29 + + +query D +SELECT last_day(null::DATE); +---- +NULL + + +statement error Failed to coerce arguments to satisfy a call to 'last_day' function +select last_day('foo'); + + +statement error Failed to coerce arguments to satisfy a call to 'last_day' function +select last_day(123); + + +statement error 'last_day' does not support zero arguments +select last_day(); + +statement error Failed to coerce arguments to satisfy a call to 'last_day' function +select last_day(last_day('2016-02-07'::string, 'foo')); + +statement error Failed to coerce arguments to satisfy a call to 'last_day' function +select last_day(last_day('2016-02-31'::string)); + diff --git a/datafusion/sqllogictest/test_files/spark/math/hex.slt b/datafusion/sqllogictest/test_files/spark/math/hex.slt index 24db1a318358a..0fb8b92de02d4 100644 --- a/datafusion/sqllogictest/test_files/spark/math/hex.slt +++ b/datafusion/sqllogictest/test_files/spark/math/hex.slt @@ -38,3 +38,13 @@ SELECT hex(a) from VALUES ('foo'), (NULL), ('foobarbaz') AS t(a); 666F6F NULL 666F6F62617262617A + +statement ok +CREATE TABLE t_utf8view as VALUES (arrow_cast('foo', 'Utf8View')), (NULL), (arrow_cast('foobarbaz', 'Utf8View')); + +query T +SELECT hex(column1) FROM t_utf8view; +---- +666F6F +NULL +666F6F62617262617A diff --git a/datafusion/sqllogictest/test_files/spark/string/luhn_check.slt b/datafusion/sqllogictest/test_files/spark/string/luhn_check.slt index 389c34ef68ab9..ccb17323b24dc 100644 --- a/datafusion/sqllogictest/test_files/spark/string/luhn_check.slt +++ b/datafusion/sqllogictest/test_files/spark/string/luhn_check.slt @@ -15,23 +15,145 @@ # specific language governing permissions and limitations # under the License. -# This file was originally created by a porting script from: -# https://github.com/lakehq/sail/tree/43b6ed8221de5c4c4adbedbb267ae1351158b43c/crates/sail-spark-connect/tests/gold_data/function -# This file is part of the implementation of the datafusion-spark function library. -# For more information, please see: -# https://github.com/apache/datafusion/issues/15914 - -## Original Query: SELECT luhn_check('79927398713'); -## PySpark 3.5.5 Result: {'luhn_check(79927398713)': True, 'typeof(luhn_check(79927398713))': 'boolean', 'typeof(79927398713)': 'string'} -#query -#SELECT luhn_check('79927398713'::string); - -## Original Query: SELECT luhn_check('79927398714'); -## PySpark 3.5.5 Result: {'luhn_check(79927398714)': False, 'typeof(luhn_check(79927398714))': 'boolean', 'typeof(79927398714)': 'string'} -#query -#SELECT luhn_check('79927398714'::string); - -## Original Query: SELECT luhn_check('8112189876'); -## PySpark 3.5.5 Result: {'luhn_check(8112189876)': True, 'typeof(luhn_check(8112189876))': 'boolean', 'typeof(8112189876)': 'string'} -#query -#SELECT luhn_check('8112189876'::string); + +query B +SELECT luhn_check('79927398713'::string); +---- +true + + +query B +SELECT luhn_check('79927398714'::string); +---- +false + + +query B +SELECT luhn_check('8112189876'::string); +---- +true + +query B +select luhn_check('4111111111111111'::string); +---- +true + +query B +select luhn_check('5500000000000004'::string); +---- +true + +query B +select luhn_check('340000000000009'::string); +---- +true + +query B +select luhn_check('6011000000000004'::string); +---- +true + + +query B +select luhn_check('6011000000000005'::string); +---- +false + + +query B +select luhn_check('378282246310006'::string); +---- +false + + +query B +select luhn_check('0'::string); +---- +true + + +query B +select luhn_check('79927398713'::string) +---- +true + +query B +select luhn_check('4417123456789113'::string) +---- +true + +query B +select luhn_check('7992 7398 714'::string) +---- +false + +query B +select luhn_check('79927398714'::string) +---- +false + +query B +select luhn_check('4111111111111111 '::string) +---- +false + + +query B +select luhn_check('4111111 111111111'::string) +---- +false + +query B +select luhn_check(' 4111111111111111'::string) +---- +false + +query B +select luhn_check(''::string) +---- +false + +query B +select luhn_check(' ') +---- +false + + +query B +select luhn_check('510B105105105106'::string) +---- +false + + +query B +select luhn_check('ABCDED'::string) +---- +false + +query B +select luhn_check(null); +---- +NULL + +query B +select luhn_check(6011111111111117::BIGINT) +---- +true + + +query B +select luhn_check(6011111111111118::BIGINT) +---- +false + + +query B +select luhn_check(123.456::decimal(6,3)) +---- +false + +query B +SELECT luhn_check(a) FROM (VALUES ('79927398713'::string), ('79927398714'::string)) AS t(a); +---- +true +false diff --git a/datafusion/sqllogictest/test_files/subquery.slt b/datafusion/sqllogictest/test_files/subquery.slt index 796570633f67c..ed73eecda03e2 100644 --- a/datafusion/sqllogictest/test_files/subquery.slt +++ b/datafusion/sqllogictest/test_files/subquery.slt @@ -210,9 +210,9 @@ physical_plan 08)--------------RepartitionExec: partitioning=Hash([t2_id@0], 4), input_partitions=4 09)----------------AggregateExec: mode=Partial, gby=[t2_id@0 as t2_id], aggr=[sum(t2.t2_int)] 10)------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -11)--------------------DataSourceExec: partitions=1, partition_sizes=[1] +11)--------------------DataSourceExec: partitions=1, partition_sizes=[2] 12)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -13)--------DataSourceExec: partitions=1, partition_sizes=[1] +13)--------DataSourceExec: partitions=1, partition_sizes=[2] query II rowsort SELECT t1_id, (SELECT sum(t2_int) FROM t2 WHERE t2.t2_id = t1.t1_id) as t2_sum from t1 @@ -245,9 +245,9 @@ physical_plan 08)--------------RepartitionExec: partitioning=Hash([t2_id@0], 4), input_partitions=4 09)----------------AggregateExec: mode=Partial, gby=[t2_id@0 as t2_id], aggr=[sum(t2.t2_int * Float64(1))] 10)------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -11)--------------------DataSourceExec: partitions=1, partition_sizes=[1] +11)--------------------DataSourceExec: partitions=1, partition_sizes=[2] 12)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -13)--------DataSourceExec: partitions=1, partition_sizes=[1] +13)--------DataSourceExec: partitions=1, partition_sizes=[2] query IR rowsort SELECT t1_id, (SELECT sum(t2_int * 1.0) + 1 FROM t2 WHERE t2.t2_id = t1.t1_id) as t2_sum from t1 @@ -280,9 +280,9 @@ physical_plan 08)--------------RepartitionExec: partitioning=Hash([t2_id@0], 4), input_partitions=4 09)----------------AggregateExec: mode=Partial, gby=[t2_id@0 as t2_id], aggr=[sum(t2.t2_int)] 10)------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -11)--------------------DataSourceExec: partitions=1, partition_sizes=[1] +11)--------------------DataSourceExec: partitions=1, partition_sizes=[2] 12)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -13)--------DataSourceExec: partitions=1, partition_sizes=[1] +13)--------DataSourceExec: partitions=1, partition_sizes=[2] query II rowsort SELECT t1_id, (SELECT sum(t2_int) FROM t2 WHERE t2.t2_id = t1.t1_id group by t2_id, 'a') as t2_sum from t1 @@ -318,9 +318,9 @@ physical_plan 10)------------------RepartitionExec: partitioning=Hash([t2_id@0], 4), input_partitions=4 11)--------------------AggregateExec: mode=Partial, gby=[t2_id@0 as t2_id], aggr=[sum(t2.t2_int)] 12)----------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -13)------------------------DataSourceExec: partitions=1, partition_sizes=[1] +13)------------------------DataSourceExec: partitions=1, partition_sizes=[2] 14)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -15)--------DataSourceExec: partitions=1, partition_sizes=[1] +15)--------DataSourceExec: partitions=1, partition_sizes=[2] query II rowsort SELECT t1_id, (SELECT sum(t2_int) FROM t2 WHERE t2.t2_id = t1.t1_id having sum(t2_int) < 3) as t2_sum from t1 @@ -1193,9 +1193,9 @@ physical_plan 02)--FilterExec: t1_id@0 > 40 OR NOT mark@3, projection=[t1_id@0, t1_name@1, t1_int@2] 03)----CoalesceBatchesExec: target_batch_size=2 04)------HashJoinExec: mode=CollectLeft, join_type=LeftMark, on=[(t1_id@0, t2_id@0)] -05)--------DataSourceExec: partitions=1, partition_sizes=[1] +05)--------DataSourceExec: partitions=1, partition_sizes=[2] 06)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -07)----------DataSourceExec: partitions=1, partition_sizes=[1] +07)----------DataSourceExec: partitions=1, partition_sizes=[2] statement ok set datafusion.explain.logical_plan_only = true; diff --git a/datafusion/sqllogictest/test_files/subquery_sort.slt b/datafusion/sqllogictest/test_files/subquery_sort.slt index d993515f4de99..1e5a3c8f526ac 100644 --- a/datafusion/sqllogictest/test_files/subquery_sort.slt +++ b/datafusion/sqllogictest/test_files/subquery_sort.slt @@ -100,7 +100,7 @@ physical_plan 01)ProjectionExec: expr=[c1@0 as c1, r@1 as r] 02)--SortExec: TopK(fetch=2), expr=[c1@0 ASC NULLS LAST, c3@2 ASC NULLS LAST, c9@3 ASC NULLS LAST], preserve_partitioning=[false] 03)----ProjectionExec: expr=[c1@0 as c1, rank() ORDER BY [sink_table.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as r, c3@1 as c3, c9@2 as c9] -04)------BoundedWindowAggExec: wdw=[rank() ORDER BY [sink_table.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "rank() ORDER BY [sink_table.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Utf8View(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +04)------BoundedWindowAggExec: wdw=[rank() ORDER BY [sink_table.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() ORDER BY [sink_table.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 05)--------SortExec: expr=[c1@0 DESC], preserve_partitioning=[false] 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c3, c9], file_type=csv, has_header=true @@ -126,7 +126,7 @@ physical_plan 01)ProjectionExec: expr=[c1@0 as c1, r@1 as r] 02)--SortExec: TopK(fetch=2), expr=[c1@0 ASC NULLS LAST, c3@2 ASC NULLS LAST, c9@3 ASC NULLS LAST], preserve_partitioning=[false] 03)----ProjectionExec: expr=[c1@0 as c1, rank() ORDER BY [sink_table_with_utf8view.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as r, c3@1 as c3, c9@2 as c9] -04)------BoundedWindowAggExec: wdw=[rank() ORDER BY [sink_table_with_utf8view.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "rank() ORDER BY [sink_table_with_utf8view.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Utf8View(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +04)------BoundedWindowAggExec: wdw=[rank() ORDER BY [sink_table_with_utf8view.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() ORDER BY [sink_table_with_utf8view.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 05)--------SortExec: expr=[c1@0 DESC], preserve_partitioning=[false] 06)----------DataSourceExec: partitions=1, partition_sizes=[1] diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt index 44d0f1f97d4d5..bff955d528ef7 100644 --- a/datafusion/sqllogictest/test_files/timestamps.slt +++ b/datafusion/sqllogictest/test_files/timestamps.slt @@ -176,6 +176,115 @@ SELECT TIMESTAMPTZ '2000-01-01T01:01:01' 2000-01-01T01:01:01Z +########## +## cast tests +########## + +query BPPPPPP +SELECT t1 = t2 AND t1 = t3 AND t1 = t4 AND t1 = t5 AND t1 = t6, * +FROM (SELECT + (SELECT CAST(CAST(1 AS float) AS timestamp(0))) AS t1, + (SELECT CAST(CAST(one AS float) AS timestamp(0)) FROM (SELECT 1 AS one)) AS t2, + (SELECT CAST(CAST(one AS float) AS timestamp(0)) FROM (VALUES (1)) t(one)) AS t3, + (SELECT CAST(CAST(1 AS double) AS timestamp(0))) AS t4, + (SELECT CAST(CAST(one AS double) AS timestamp(0)) FROM (SELECT 1 AS one)) AS t5, + (SELECT CAST(CAST(one AS double) AS timestamp(0)) FROM (VALUES (1)) t(one)) AS t6 +) +---- +true 1970-01-01T00:00:01 1970-01-01T00:00:01 1970-01-01T00:00:01 1970-01-01T00:00:01 1970-01-01T00:00:01 1970-01-01T00:00:01 + +query BPPPPPP +SELECT t1 = t2 AND t1 = t3 AND t1 = t4 AND t1 = t5 AND t1 = t6, * +FROM (SELECT + (SELECT CAST(CAST(1 AS float) AS timestamp(3))) AS t1, + (SELECT CAST(CAST(one AS float) AS timestamp(3)) FROM (SELECT 1 AS one)) AS t2, + (SELECT CAST(CAST(one AS float) AS timestamp(3)) FROM (VALUES (1)) t(one)) AS t3, + (SELECT CAST(CAST(1 AS double) AS timestamp(3))) AS t4, + (SELECT CAST(CAST(one AS double) AS timestamp(3)) FROM (SELECT 1 AS one)) AS t5, + (SELECT CAST(CAST(one AS double) AS timestamp(3)) FROM (VALUES (1)) t(one)) AS t6 +) +---- +true 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001 + +query BPPPPPP +SELECT t1 = t2 AND t1 = t3 AND t1 = t4 AND t1 = t5 AND t1 = t6, * +FROM (SELECT + (SELECT CAST(CAST(1 AS float) AS timestamp(6))) AS t1, + (SELECT CAST(CAST(one AS float) AS timestamp(6)) FROM (SELECT 1 AS one)) AS t2, + (SELECT CAST(CAST(one AS float) AS timestamp(6)) FROM (VALUES (1)) t(one)) AS t3, + (SELECT CAST(CAST(1 AS double) AS timestamp(6))) AS t4, + (SELECT CAST(CAST(one AS double) AS timestamp(6)) FROM (SELECT 1 AS one)) AS t5, + (SELECT CAST(CAST(one AS double) AS timestamp(6)) FROM (VALUES (1)) t(one)) AS t6 +) +---- +true 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001 + +query BPPPPPP +SELECT t1 = t2 AND t1 = t3 AND t1 = t4 AND t1 = t5 AND t1 = t6, * +FROM (SELECT + (SELECT CAST(CAST(1 AS float) AS timestamp(9))) AS t1, + (SELECT CAST(CAST(one AS float) AS timestamp(9)) FROM (SELECT 1 AS one)) AS t2, + (SELECT CAST(CAST(one AS float) AS timestamp(9)) FROM (VALUES (1)) t(one)) AS t3, + (SELECT CAST(CAST(1 AS double) AS timestamp(9))) AS t4, + (SELECT CAST(CAST(one AS double) AS timestamp(9)) FROM (SELECT 1 AS one)) AS t5, + (SELECT CAST(CAST(one AS double) AS timestamp(9)) FROM (VALUES (1)) t(one)) AS t6 +) +---- +true 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 + +query BPPPPPP +SELECT t1 = t2 AND t1 = t3 AND t1 = t4 AND t1 = t5 AND t1 = t6, * +FROM (SELECT + (SELECT CAST(CAST(1.125 AS float) AS timestamp(0))) AS t1, + (SELECT CAST(CAST(one_and_a_bit AS float) AS timestamp(0)) FROM (SELECT 1.125 AS one_and_a_bit)) AS t2, + (SELECT CAST(CAST(one_and_a_bit AS float) AS timestamp(0)) FROM (VALUES (1.125)) t(one_and_a_bit)) AS t3, + (SELECT CAST(CAST(1.125 AS double) AS timestamp(0))) AS t4, + (SELECT CAST(CAST(one_and_a_bit AS double) AS timestamp(0)) FROM (SELECT 1.125 AS one_and_a_bit)) AS t5, + (SELECT CAST(CAST(one_and_a_bit AS double) AS timestamp(0)) FROM (VALUES (1.125)) t(one_and_a_bit)) AS t6 +) +---- +true 1970-01-01T00:00:01 1970-01-01T00:00:01 1970-01-01T00:00:01 1970-01-01T00:00:01 1970-01-01T00:00:01 1970-01-01T00:00:01 + +query BPPPPPP +SELECT t1 = t2 AND t1 = t3 AND t1 = t4 AND t1 = t5 AND t1 = t6, * +FROM (SELECT + (SELECT CAST(CAST(1.125 AS float) AS timestamp(3))) AS t1, + (SELECT CAST(CAST(one_and_a_bit AS float) AS timestamp(3)) FROM (SELECT 1.125 AS one_and_a_bit)) AS t2, + (SELECT CAST(CAST(one_and_a_bit AS float) AS timestamp(3)) FROM (VALUES (1.125)) t(one_and_a_bit)) AS t3, + (SELECT CAST(CAST(1.125 AS double) AS timestamp(3))) AS t4, + (SELECT CAST(CAST(one_and_a_bit AS double) AS timestamp(3)) FROM (SELECT 1.125 AS one_and_a_bit)) AS t5, + (SELECT CAST(CAST(one_and_a_bit AS double) AS timestamp(3)) FROM (VALUES (1.125)) t(one_and_a_bit)) AS t6 +) +---- +true 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001 + +query BPPPPPP +SELECT t1 = t2 AND t1 = t3 AND t1 = t4 AND t1 = t5 AND t1 = t6, * +FROM (SELECT + (SELECT CAST(CAST(1.125 AS float) AS timestamp(6))) AS t1, + (SELECT CAST(CAST(one_and_a_bit AS float) AS timestamp(6)) FROM (SELECT 1.125 AS one_and_a_bit)) AS t2, + (SELECT CAST(CAST(one_and_a_bit AS float) AS timestamp(6)) FROM (VALUES (1.125)) t(one_and_a_bit)) AS t3, + (SELECT CAST(CAST(1.125 AS double) AS timestamp(6))) AS t4, + (SELECT CAST(CAST(one_and_a_bit AS double) AS timestamp(6)) FROM (SELECT 1.125 AS one_and_a_bit)) AS t5, + (SELECT CAST(CAST(one_and_a_bit AS double) AS timestamp(6)) FROM (VALUES (1.125)) t(one_and_a_bit)) AS t6 +) +---- +true 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001 + +query BPPPPPP +SELECT t1 = t2 AND t1 = t3 AND t1 = t4 AND t1 = t5 AND t1 = t6, * +FROM (SELECT + (SELECT CAST(CAST(1.125 AS float) AS timestamp(9))) AS t1, + (SELECT CAST(CAST(one_and_a_bit AS float) AS timestamp(9)) FROM (SELECT 1.125 AS one_and_a_bit)) AS t2, + (SELECT CAST(CAST(one_and_a_bit AS float) AS timestamp(9)) FROM (VALUES (1.125)) t(one_and_a_bit)) AS t3, + (SELECT CAST(CAST(1.125 AS double) AS timestamp(9))) AS t4, + (SELECT CAST(CAST(one_and_a_bit AS double) AS timestamp(9)) FROM (SELECT 1.125 AS one_and_a_bit)) AS t5, + (SELECT CAST(CAST(one_and_a_bit AS double) AS timestamp(9)) FROM (VALUES (1.125)) t(one_and_a_bit)) AS t6 +) +---- +true 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 + + ########## ## to_timestamp tests ########## @@ -394,12 +503,12 @@ SELECT COUNT(*) FROM ts_data_secs where ts > to_timestamp_seconds('2020-09-08 12 query PPP SELECT to_timestamp(1.1) as c1, cast(1.1 as timestamp) as c2, 1.1::timestamp as c3; ---- -1970-01-01T00:00:01.100 1970-01-01T00:00:01.100 1970-01-01T00:00:01.100 +1970-01-01T00:00:01.100 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 query PPP SELECT to_timestamp(-1.1) as c1, cast(-1.1 as timestamp) as c2, (-1.1)::timestamp as c3; ---- -1969-12-31T23:59:58.900 1969-12-31T23:59:58.900 1969-12-31T23:59:58.900 +1969-12-31T23:59:58.900 1969-12-31T23:59:59.999999999 1969-12-31T23:59:59.999999999 query PPP SELECT to_timestamp(0.0) as c1, cast(0.0 as timestamp) as c2, 0.0::timestamp as c3; @@ -409,24 +518,24 @@ SELECT to_timestamp(0.0) as c1, cast(0.0 as timestamp) as c2, 0.0::timestamp as query PPP SELECT to_timestamp(1.23456789) as c1, cast(1.23456789 as timestamp) as c2, 1.23456789::timestamp as c3; ---- -1970-01-01T00:00:01.234567890 1970-01-01T00:00:01.234567890 1970-01-01T00:00:01.234567890 +1970-01-01T00:00:01.234567890 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 query PPP SELECT to_timestamp(123456789.123456789) as c1, cast(123456789.123456789 as timestamp) as c2, 123456789.123456789::timestamp as c3; ---- -1973-11-29T21:33:09.123456784 1973-11-29T21:33:09.123456784 1973-11-29T21:33:09.123456784 +1973-11-29T21:33:09.123456784 1970-01-01T00:00:00.123456789 1970-01-01T00:00:00.123456789 # to_timestamp Decimal128 inputs query PPP SELECT to_timestamp(arrow_cast(1.1, 'Decimal128(2,1)')) as c1, cast(arrow_cast(1.1, 'Decimal128(2,1)') as timestamp) as c2, arrow_cast(1.1, 'Decimal128(2,1)')::timestamp as c3; ---- -1970-01-01T00:00:01.100 1970-01-01T00:00:01.100 1970-01-01T00:00:01.100 +1970-01-01T00:00:01.100 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 query PPP SELECT to_timestamp(arrow_cast(-1.1, 'Decimal128(2,1)')) as c1, cast(arrow_cast(-1.1, 'Decimal128(2,1)') as timestamp) as c2, arrow_cast(-1.1, 'Decimal128(2,1)')::timestamp as c3; ---- -1969-12-31T23:59:58.900 1969-12-31T23:59:58.900 1969-12-31T23:59:58.900 +1969-12-31T23:59:58.900 1969-12-31T23:59:59.999999999 1969-12-31T23:59:59.999999999 query PPP SELECT to_timestamp(arrow_cast(0.0, 'Decimal128(2,1)')) as c1, cast(arrow_cast(0.0, 'Decimal128(2,1)') as timestamp) as c2, arrow_cast(0.0, 'Decimal128(2,1)')::timestamp as c3; @@ -436,12 +545,12 @@ SELECT to_timestamp(arrow_cast(0.0, 'Decimal128(2,1)')) as c1, cast(arrow_cast(0 query PPP SELECT to_timestamp(arrow_cast(1.23456789, 'Decimal128(9,8)')) as c1, cast(arrow_cast(1.23456789, 'Decimal128(9,8)') as timestamp) as c2, arrow_cast(1.23456789, 'Decimal128(9,8)')::timestamp as c3; ---- -1970-01-01T00:00:01.234567890 1970-01-01T00:00:01.234567890 1970-01-01T00:00:01.234567890 +1970-01-01T00:00:01.234567890 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 query PPP SELECT to_timestamp(arrow_cast(123456789.123456789, 'Decimal128(18,9)')) as c1, cast(arrow_cast(123456789.123456789, 'Decimal128(18,9)') as timestamp) as c2, arrow_cast(123456789.123456789, 'Decimal128(18,9)')::timestamp as c3; ---- -1973-11-29T21:33:09.123456784 1973-11-29T21:33:09.123456784 1973-11-29T21:33:09.123456784 +1973-11-29T21:33:09.123456784 1970-01-01T00:00:00.123456789 1970-01-01T00:00:00.123456789 # from_unixtime @@ -3420,3 +3529,67 @@ select to_timestamp('-1'); query error DataFusion error: Arrow error: Parser error: Error parsing timestamp from '\-1': timestamp must contain at least 10 characters select to_timestamp(arrow_cast('-1', 'Utf8')); + +query P +SELECT CAST(CAST(1 AS decimal(17,2)) AS timestamp(3)) AS a UNION ALL +SELECT CAST(CAST(one AS decimal(17,2)) AS timestamp(3)) AS a FROM (VALUES (1)) t(one); +---- +1970-01-01T00:00:00.001 +1970-01-01T00:00:00.001 + +query P +SELECT arrow_cast(CAST(1 AS decimal(17,2)), 'Timestamp(Nanosecond, None)') AS a UNION ALL +SELECT arrow_cast(CAST(one AS decimal(17,2)), 'Timestamp(Nanosecond, None)') AS a FROM (VALUES (1)) t(one); +---- +1970-01-01T00:00:00.000000001 +1970-01-01T00:00:00.000000001 + +query P +SELECT arrow_cast(CAST(1 AS decimal(17,2)), 'Timestamp(Microsecond, None)') AS a UNION ALL +SELECT arrow_cast(CAST(one AS decimal(17,2)), 'Timestamp(Microsecond, None)') AS a FROM (VALUES (1)) t(one); +---- +1970-01-01T00:00:00.000001 +1970-01-01T00:00:00.000001 + +query P +SELECT arrow_cast(CAST(1 AS decimal(17,2)), 'Timestamp(Millisecond, None)') AS a UNION ALL +SELECT arrow_cast(CAST(one AS decimal(17,2)), 'Timestamp(Millisecond, None)') AS a FROM (VALUES (1)) t(one); +---- +1970-01-01T00:00:00.001 +1970-01-01T00:00:00.001 + +query P +SELECT arrow_cast(CAST(1 AS decimal(17,2)), 'Timestamp(Second, None)') AS a UNION ALL +SELECT arrow_cast(CAST(one AS decimal(17,2)), 'Timestamp(Second, None)') AS a FROM (VALUES (1)) t(one); +---- +1970-01-01T00:00:01 +1970-01-01T00:00:01 + + +query P +SELECT arrow_cast(CAST(1.123 AS decimal(17,3)), 'Timestamp(Nanosecond, None)') AS a UNION ALL +SELECT arrow_cast(CAST(one AS decimal(17,3)), 'Timestamp(Nanosecond, None)') AS a FROM (VALUES (1.123)) t(one); +---- +1970-01-01T00:00:00.000000001 +1970-01-01T00:00:00.000000001 + +query P +SELECT arrow_cast(CAST(1.123 AS decimal(17,3)), 'Timestamp(Microsecond, None)') AS a UNION ALL +SELECT arrow_cast(CAST(one AS decimal(17,3)), 'Timestamp(Microsecond, None)') AS a FROM (VALUES (1.123)) t(one); +---- +1970-01-01T00:00:00.000001 +1970-01-01T00:00:00.000001 + +query P +SELECT arrow_cast(CAST(1.123 AS decimal(17,3)), 'Timestamp(Millisecond, None)') AS a UNION ALL +SELECT arrow_cast(CAST(one AS decimal(17,3)), 'Timestamp(Millisecond, None)') AS a FROM (VALUES (1.123)) t(one); +---- +1970-01-01T00:00:00.001 +1970-01-01T00:00:00.001 + +query P +SELECT arrow_cast(CAST(1.123 AS decimal(17,3)), 'Timestamp(Second, None)') AS a UNION ALL +SELECT arrow_cast(CAST(one AS decimal(17,3)), 'Timestamp(Second, None)') AS a FROM (VALUES (1.123)) t(one); +---- +1970-01-01T00:00:01 +1970-01-01T00:00:01 diff --git a/datafusion/sqllogictest/test_files/topk.slt b/datafusion/sqllogictest/test_files/topk.slt index 9ff382d32af95..afa78e43de2b5 100644 --- a/datafusion/sqllogictest/test_files/topk.slt +++ b/datafusion/sqllogictest/test_files/topk.slt @@ -53,7 +53,7 @@ query I select * from (select * from topk limit 8) order by x limit 3; ---- 0 -1 +2 2 diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt index f901a4d373a31..6097444bc5569 100644 --- a/datafusion/sqllogictest/test_files/union.slt +++ b/datafusion/sqllogictest/test_files/union.slt @@ -413,15 +413,14 @@ logical_plan 06)------TableScan: aggregate_test_100 projection=[c1, c3] physical_plan 01)SortPreservingMergeExec: [c9@1 DESC], fetch=5 -02)--UnionExec -03)----SortExec: TopK(fetch=5), expr=[c9@1 DESC], preserve_partitioning=[true] +02)--SortExec: TopK(fetch=5), expr=[c9@1 DESC], preserve_partitioning=[true] +03)----UnionExec 04)------ProjectionExec: expr=[c1@0 as c1, CAST(c9@1 AS Decimal128(20, 0)) as c9] 05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], file_type=csv, has_header=true -07)----SortExec: TopK(fetch=5), expr=[c9@1 DESC], preserve_partitioning=[true] -08)------ProjectionExec: expr=[c1@0 as c1, CAST(c3@1 AS Decimal128(20, 0)) as c9] -09)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -10)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c3], file_type=csv, has_header=true +07)------ProjectionExec: expr=[c1@0 as c1, CAST(c3@1 AS Decimal128(20, 0)) as c9] +08)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +09)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c3], file_type=csv, has_header=true query TR SELECT c1, c9 FROM aggregate_test_100 UNION ALL SELECT c1, c3 FROM aggregate_test_100 ORDER BY c9 DESC LIMIT 5 @@ -522,7 +521,7 @@ physical_plan 16)----ProjectionExec: expr=[1 as cnt] 17)------PlaceholderRowExec 18)----ProjectionExec: expr=[lead(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as cnt] -19)------BoundedWindowAggExec: wdw=[lead(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "lead(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted] +19)------BoundedWindowAggExec: wdw=[lead(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "lead(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted] 20)--------ProjectionExec: expr=[1 as c1] 21)----------PlaceholderRowExec @@ -916,19 +915,19 @@ physical_plan 03)----SortExec: expr=[y@0 ASC NULLS LAST], preserve_partitioning=[true] 04)------ProjectionExec: expr=[CAST(y@0 AS Int64) as y] 05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -06)----------DataSourceExec: partitions=1, partition_sizes=[1] +06)----------DataSourceExec: partitions=1, partition_sizes=[2] 07)----SortExec: expr=[y@0 ASC NULLS LAST], preserve_partitioning=[false] 08)------DataSourceExec: partitions=1, partition_sizes=[1] # optimize_subquery_sort in create_relation removes Sort so the result is not sorted. query I -SELECT * FROM v1; +SELECT * FROM v1 ORDER BY 1; ---- -20 -40 +1 3 3 -1 +20 +40 query TT explain SELECT * FROM v1; @@ -943,7 +942,7 @@ physical_plan 01)UnionExec 02)--ProjectionExec: expr=[CAST(y@0 AS Int64) as y] 03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -04)------DataSourceExec: partitions=1, partition_sizes=[1] +04)------DataSourceExec: partitions=1, partition_sizes=[2] 05)--DataSourceExec: partitions=1, partition_sizes=[1] statement count 0 diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index c86921012f9bc..82de11302857a 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -360,7 +360,7 @@ physical_plan 02)--ProjectionExec: expr=[b@0 as b, max(d.a)@1 as max_a, max(d.seq)@2 as max(d.seq)] 03)----AggregateExec: mode=SinglePartitioned, gby=[b@2 as b], aggr=[max(d.a), max(d.seq)], ordering_mode=Sorted 04)------ProjectionExec: expr=[row_number() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as seq, a@0 as a, b@1 as b] -05)--------BoundedWindowAggExec: wdw=[row_number() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "row_number() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +05)--------BoundedWindowAggExec: wdw=[row_number() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 06)----------SortExec: expr=[b@1 ASC NULLS LAST, a@0 ASC NULLS LAST], preserve_partitioning=[true] 07)------------CoalesceBatchesExec: target_batch_size=8192 08)--------------RepartitionExec: partitioning=Hash([b@1], 4), input_partitions=4 @@ -1241,9 +1241,9 @@ logical_plan 05)--------TableScan: aggregate_test_100 projection=[c8, c9] physical_plan 01)ProjectionExec: expr=[c9@0 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as sum1, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as sum2] -02)--BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +02)--BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 03)----ProjectionExec: expr=[c9@1 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW] -04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 05)--------SortExec: expr=[c9@1 ASC NULLS LAST, c8@0 ASC NULLS LAST], preserve_partitioning=[false] 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c8, c9], file_type=csv, has_header=true @@ -1263,8 +1263,8 @@ logical_plan physical_plan 01)ProjectionExec: expr=[c2@0 as c2, max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@4 as sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW] 02)--WindowAggExec: wdw=[sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] -03)----BoundedWindowAggExec: wdw=[max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int8(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] -04)------BoundedWindowAggExec: wdw=[min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int8(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +03)----BoundedWindowAggExec: wdw=[max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] +04)------BoundedWindowAggExec: wdw=[min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 05)--------SortExec: expr=[c2@0 ASC NULLS LAST, c9@1 ASC NULLS LAST], preserve_partitioning=[false] 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c9], file_type=csv, has_header=true @@ -1287,9 +1287,9 @@ physical_plan 01)SortExec: expr=[c2@0 ASC NULLS LAST], preserve_partitioning=[false] 02)--ProjectionExec: expr=[c2@0 as c2, max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@4 as sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW] 03)----WindowAggExec: wdw=[sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] -04)------BoundedWindowAggExec: wdw=[max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +04)------BoundedWindowAggExec: wdw=[max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 05)--------SortExec: expr=[c9@1 ASC NULLS LAST, c2@0 ASC NULLS LAST], preserve_partitioning=[false] -06)----------BoundedWindowAggExec: wdw=[min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int8(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +06)----------BoundedWindowAggExec: wdw=[min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 07)------------SortExec: expr=[c2@0 ASC NULLS LAST, c9@1 ASC NULLS LAST], preserve_partitioning=[false] 08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c9], file_type=csv, has_header=true @@ -1311,12 +1311,12 @@ logical_plan 05)--------TableScan: aggregate_test_100 projection=[c1, c2, c4] physical_plan 01)ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@2 as sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING] -02)--BoundedWindowAggExec: wdw=[count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted] +02)--BoundedWindowAggExec: wdw=[count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted] 03)----SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true] 04)------CoalesceBatchesExec: target_batch_size=4096 05)--------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2 06)----------ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING] -07)------------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted] +07)------------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted] 08)--------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true] 09)----------------CoalesceBatchesExec: target_batch_size=4096 10)------------------RepartitionExec: partitioning=Hash([c1@0, c2@1], 2), input_partitions=2 @@ -1343,8 +1343,8 @@ logical_plan physical_plan 01)ProjectionExec: expr=[c9@0 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@2 as sum1, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@1 as sum2] 02)--GlobalLimitExec: skip=0, fetch=5 -03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted] -04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted] +03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING], mode=[Sorted] +04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted] 05)--------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false] 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true @@ -1384,8 +1384,8 @@ logical_plan physical_plan 01)ProjectionExec: expr=[c9@0 as c9, first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as fv1, first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@1 as fv2, lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as lag1, lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@2 as lag2, lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as lead1, lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@3 as lead2] 02)--GlobalLimitExec: skip=0, fetch=5 -03)----BoundedWindowAggExec: wdw=[first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(1)), is_causal: false }, lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }, lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted] -04)------BoundedWindowAggExec: wdw=[first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }, lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted] +03)----BoundedWindowAggExec: wdw=[first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING, lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING, lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING], mode=[Sorted] +04)------BoundedWindowAggExec: wdw=[first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING], mode=[Sorted] 05)--------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false] 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true @@ -1427,9 +1427,9 @@ logical_plan physical_plan 01)ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@2 as rn1, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@1 as rn2] 02)--GlobalLimitExec: skip=0, fetch=5 -03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted] +03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted] 04)------SortExec: expr=[c9@0 ASC NULLS LAST], preserve_partitioning=[false] -05)--------BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted] +05)--------BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted] 06)----------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false] 07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true @@ -1469,10 +1469,10 @@ logical_plan physical_plan 01)ProjectionExec: expr=[c9@2 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@5 as sum1, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@3 as sum2, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as rn2] 02)--GlobalLimitExec: skip=0, fetch=5 -03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted] +03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted] 04)------SortExec: expr=[c9@2 ASC NULLS LAST, c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[false] -05)--------BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted] -06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted] +05)--------BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted] +06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted] 07)------------SortExec: expr=[c9@2 DESC, c1@0 DESC], preserve_partitioning=[false] 08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c9], file_type=csv, has_header=true @@ -1553,17 +1553,17 @@ physical_plan 02)--GlobalLimitExec: skip=0, fetch=5 03)----WindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(10)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)), is_causal: false }] 04)------ProjectionExec: expr=[c1@0 as c1, c3@2 as c3, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@4 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@6 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@7 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@8 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@9 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@10 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@11 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@12 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@13 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@14 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@15 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@16 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@17 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@18 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW] -05)--------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +05)--------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 06)----------SortExec: expr=[c3@2 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[false] -07)------------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +07)------------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 08)--------------SortExec: expr=[c3@2 ASC NULLS LAST, c1@0 ASC], preserve_partitioning=[false] -09)----------------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +09)----------------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 10)------------------SortExec: expr=[c3@2 ASC NULLS LAST, c1@0 DESC], preserve_partitioning=[false] 11)--------------------WindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(11)), end_bound: Following(Int64(10)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(11)), end_bound: Following(Int64(NULL)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }] 12)----------------------WindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(10)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)), is_causal: false }] 13)------------------------SortExec: expr=[c3@2 DESC NULLS LAST], preserve_partitioning=[false] 14)--------------------------WindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(10)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)), is_causal: false }] -15)----------------------------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +15)----------------------------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 16)------------------------------SortExec: expr=[c3@2 DESC, c1@0 ASC NULLS LAST], preserve_partitioning=[false] 17)--------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/null_cases.csv]]}, projection=[c1, c2, c3], file_type=csv, has_header=true @@ -1637,8 +1637,8 @@ logical_plan physical_plan 01)ProjectionExec: expr=[c9@1 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@2 as sum1, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@3 as sum2] 02)--GlobalLimitExec: skip=0, fetch=5 -03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted] -04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted] +03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted] +04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted] 05)--------SortExec: expr=[c1@0 ASC NULLS LAST, c9@1 DESC], preserve_partitioning=[false] 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], file_type=csv, has_header=true @@ -1681,8 +1681,8 @@ logical_plan physical_plan 01)ProjectionExec: expr=[c9@1 as c9, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@3 as sum1, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@2 as sum2] 02)--GlobalLimitExec: skip=0, fetch=5 -03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted] -04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted] +03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING], mode=[Sorted] +04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted] 05)--------SortExec: expr=[c1@0 ASC NULLS LAST, c9@1 DESC], preserve_partitioning=[false] 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], file_type=csv, has_header=true @@ -1729,7 +1729,7 @@ physical_plan 02)--GlobalLimitExec: skip=0, fetch=5 03)----WindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int16(NULL)), is_causal: false }] 04)------ProjectionExec: expr=[__common_expr_1@0 as __common_expr_1, c3@2 as c3, c9@3 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW] -05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int16(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 06)----------SortPreservingMergeExec: [__common_expr_1@0 DESC, c9@3 DESC, c2@1 ASC NULLS LAST] 07)------------SortExec: expr=[__common_expr_1@0 DESC, c9@3 DESC, c2@1 ASC NULLS LAST], preserve_partitioning=[true] 08)--------------ProjectionExec: expr=[c3@1 + c4@2 as __common_expr_1, c2@0 as c2, c3@1 as c3, c9@3 as c9] @@ -1822,13 +1822,13 @@ logical_plan physical_plan 01)SortPreservingMergeExec: [c3@0 ASC NULLS LAST], fetch=5 02)--ProjectionExec: expr=[c3@0 as c3, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as sum1, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum2] -03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 04)------SortExec: expr=[c3@0 ASC NULLS LAST, c9@1 DESC], preserve_partitioning=[true] 05)--------CoalesceBatchesExec: target_batch_size=4096 06)----------RepartitionExec: partitioning=Hash([c3@0], 2), input_partitions=2 07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 08)--------------ProjectionExec: expr=[c3@1 as c3, c9@2 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW] -09)----------------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int16(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +09)----------------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 10)------------------SortExec: expr=[c3@1 DESC, c9@2 DESC, c2@0 ASC NULLS LAST], preserve_partitioning=[false] 11)--------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c3, c9], file_type=csv, has_header=true @@ -1864,7 +1864,7 @@ logical_plan physical_plan 01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST] 02)--ProjectionExec: expr=[c1@0 as c1, row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as rn1] -03)----BoundedWindowAggExec: wdw=[row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted] +03)----BoundedWindowAggExec: wdw=[row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted] 04)------SortExec: expr=[c1@0 ASC NULLS LAST], preserve_partitioning=[true] 05)--------CoalesceBatchesExec: target_batch_size=4096 06)----------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2 @@ -1993,7 +1993,7 @@ logical_plan physical_plan 01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST, rn1@1 ASC NULLS LAST] 02)--ProjectionExec: expr=[c1@0 as c1, row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as rn1] -03)----BoundedWindowAggExec: wdw=[row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted] +03)----BoundedWindowAggExec: wdw=[row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted] 04)------SortExec: expr=[c1@0 ASC NULLS LAST], preserve_partitioning=[true] 05)--------CoalesceBatchesExec: target_batch_size=4096 06)----------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2 @@ -2019,10 +2019,10 @@ logical_plan physical_plan 01)SortExec: expr=[c1@0 ASC NULLS LAST], preserve_partitioning=[false] 02)--ProjectionExec: expr=[c1@0 as c1, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING@2 as sum1, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@3 as sum2] -03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted] +03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted] 04)------SortPreservingMergeExec: [c9@1 ASC NULLS LAST] 05)--------SortExec: expr=[c9@1 ASC NULLS LAST], preserve_partitioning=[true] -06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(3)), is_causal: false }], mode=[Sorted] +06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING], mode=[Sorted] 07)------------SortExec: expr=[c1@0 ASC NULLS LAST, c9@1 ASC NULLS LAST], preserve_partitioning=[true] 08)--------------CoalesceBatchesExec: target_batch_size=4096 09)----------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2 @@ -2107,10 +2107,10 @@ logical_plan physical_plan 01)SortExec: TopK(fetch=5), expr=[c9@0 ASC NULLS LAST], preserve_partitioning=[false] 02)--ProjectionExec: expr=[c9@2 as c9, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as sum1, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@6 as sum2, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@3 as sum3, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@5 as sum4] -03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted] +03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted] 04)------ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c9@3 as c9, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@4 as sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@5 as sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@6 as sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING] 05)--------WindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }] -06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted] +06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted] 07)------------WindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }] 08)--------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, c9@3 ASC NULLS LAST, c8@2 ASC NULLS LAST], preserve_partitioning=[false] 09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c8, c9], file_type=csv, has_header=true @@ -2162,11 +2162,11 @@ logical_plan physical_plan 01)ProjectionExec: expr=[c9@1 as c9, sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as sum1, sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@6 as sum2, sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@3 as sum3, sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@5 as sum4] 02)--GlobalLimitExec: skip=0, fetch=5 -03)----BoundedWindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted] +03)----BoundedWindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted] 04)------ProjectionExec: expr=[c2@0 as c2, c9@2 as c9, c1_alias@3 as c1_alias, sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@4 as sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING, sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@5 as sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@6 as sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING] 05)--------WindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }] 06)----------ProjectionExec: expr=[c2@1 as c2, c8@2 as c8, c9@3 as c9, c1_alias@4 as c1_alias, sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@5 as sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING, sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@6 as sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING] -07)------------BoundedWindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted] +07)------------BoundedWindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted] 08)--------------WindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }] 09)----------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, c9@3 ASC NULLS LAST, c8@2 ASC NULLS LAST], preserve_partitioning=[false] 10)------------------ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c8@2 as c8, c9@3 as c9, c1@0 as c1_alias] @@ -2208,9 +2208,9 @@ physical_plan 01)ProjectionExec: expr=[sum1@0 as sum1, sum2@1 as sum2] 02)--SortExec: TopK(fetch=5), expr=[c9@2 ASC NULLS LAST], preserve_partitioning=[false] 03)----ProjectionExec: expr=[sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as sum1, sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING@4 as sum2, c9@1 as c9] -04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING: Ok(Field { name: "sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Groups, start_bound: Preceding(UInt64(5)), end_bound: Preceding(UInt64(3)), is_causal: true }], mode=[Sorted] +04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING: Field { name: "sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING], mode=[Sorted] 05)--------ProjectionExec: expr=[c1@0 as c1, c9@2 as c9, c12@3 as c12, sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING] -06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Groups, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted] +06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted] 07)------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[false] 08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c9, c12], file_type=csv, has_header=true @@ -2244,7 +2244,7 @@ logical_plan physical_plan 01)ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1] 02)--GlobalLimitExec: skip=0, fetch=5 -03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 04)------SortExec: expr=[c9@0 ASC NULLS LAST], preserve_partitioning=[false] 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true @@ -2281,7 +2281,7 @@ logical_plan physical_plan 01)ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1] 02)--GlobalLimitExec: skip=0, fetch=5 -03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false] 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true @@ -2318,7 +2318,7 @@ logical_plan physical_plan 01)SortExec: TopK(fetch=5), expr=[rn1@1 DESC], preserve_partitioning=[false] 02)--ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1] -03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false] 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true @@ -2358,7 +2358,7 @@ logical_plan physical_plan 01)SortExec: TopK(fetch=5), expr=[rn1@1 ASC NULLS LAST, c9@0 ASC NULLS LAST], preserve_partitioning=[false], sort_prefix=[rn1@1 ASC NULLS LAST] 02)--ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1] -03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false] 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true @@ -2433,7 +2433,7 @@ logical_plan physical_plan 01)ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1] 02)--GlobalLimitExec: skip=0, fetch=5 -03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false] 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true @@ -2455,7 +2455,7 @@ logical_plan physical_plan 01)ProjectionExec: expr=[c5@0 as c5, c9@1 as c9, row_number() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rn1] 02)--GlobalLimitExec: skip=0, fetch=5 -03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "row_number() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Decimal128(None,21,0)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 04)------SortExec: expr=[CAST(c9@1 AS Decimal128(20, 0)) + CAST(c5@0 AS Decimal128(20, 0)) DESC], preserve_partitioning=[false] 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c5, c9], file_type=csv, has_header=true @@ -2476,7 +2476,7 @@ logical_plan physical_plan 01)ProjectionExec: expr=[c9@0 as c9, CAST(row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 AS Int64) as rn1] 02)--GlobalLimitExec: skip=0, fetch=5 -03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false] 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true @@ -2581,10 +2581,10 @@ physical_plan 01)ProjectionExec: expr=[sum1@0 as sum1, sum2@1 as sum2, sum3@2 as sum3, min1@3 as min1, min2@4 as min2, min3@5 as min3, max1@6 as max1, max2@7 as max2, max3@8 as max3, cnt1@9 as cnt1, cnt2@10 as cnt2, sumr1@11 as sumr1, sumr2@12 as sumr2, sumr3@13 as sumr3, minr1@14 as minr1, minr2@15 as minr2, minr3@16 as minr3, maxr1@17 as maxr1, maxr2@18 as maxr2, maxr3@19 as maxr3, cntr1@20 as cntr1, cntr2@21 as cntr2, sum4@22 as sum4, cnt3@23 as cnt3] 02)--SortExec: TopK(fetch=5), expr=[inc_col@24 DESC], preserve_partitioning=[false] 03)----ProjectionExec: expr=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@13 as sum1, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@14 as sum2, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@15 as sum3, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@16 as min1, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@17 as min2, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@18 as min3, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@19 as max1, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@20 as max2, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@21 as max3, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING@22 as cnt1, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@23 as cnt2, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING@2 as sumr1, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING@3 as sumr2, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as sumr3, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@5 as minr1, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@6 as minr2, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@7 as minr3, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@8 as maxr1, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@9 as maxr2, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@10 as maxr3, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@11 as cntr1, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@12 as cntr2, sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@24 as sum4, count(Int64(1)) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@25 as cnt3, inc_col@1 as inc_col] -04)------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(8)), end_bound: Following(UInt64(1)), is_causal: false }, count(Int64(1)) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(Int64(1)) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(8)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted] +04)------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING, count(Int64(1)) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Field { name: "count(Int64(1)) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING], mode=[Sorted] 05)--------ProjectionExec: expr=[__common_expr_1@0 as __common_expr_1, inc_col@3 as inc_col, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING@5 as sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING@6 as sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@7 as sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@8 as min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@9 as min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@10 as min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@11 as max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@12 as max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@13 as max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@14 as count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@15 as count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@16 as sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@17 as sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@18 as sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@19 as min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@20 as min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@21 as min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@22 as max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@23 as max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@24 as max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING@25 as count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@26 as count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING] -06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(5)), end_bound: Following(Int32(1)), is_causal: false }, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(5)), end_bound: Following(Int32(1)), is_causal: false }, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(5)), end_bound: Following(Int32(1)), is_causal: false }, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING: Ok(Field { name: "count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(4)), end_bound: Following(Int32(8)), is_causal: false }, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(8)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted] -07)------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(4)), end_bound: Following(Int32(1)), is_causal: false }, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(8)), end_bound: Following(Int32(1)), is_causal: false }, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(1)), is_causal: false }, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(5)), is_causal: false }, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(5)), is_causal: false }, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING: Ok(Field { name: "count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(2)), end_bound: Following(Int32(6)), is_causal: false }, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(8)), is_causal: false }], mode=[Sorted] +06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { name: "min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { name: "max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING: Field { name: "count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Field { name: "count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING], mode=[Sorted] +07)------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING: Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 4 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING: Field { name: "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 8 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { name: "min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 5 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { name: "max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 5 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING: Field { name: "count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 2 PRECEDING AND 6 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Field { name: "count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 8 FOLLOWING], mode=[Sorted] 08)--------------ProjectionExec: expr=[CAST(desc_col@2 AS Int64) as __common_expr_1, CAST(inc_col@1 AS Int64) as __common_expr_2, ts@0 as ts, inc_col@1 as inc_col, desc_col@2 as desc_col] 09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col, desc_col], output_ordering=[ts@0 ASC NULLS LAST], file_type=csv, has_header=true @@ -2667,8 +2667,8 @@ logical_plan physical_plan 01)SortExec: TopK(fetch=5), expr=[ts@0 DESC], preserve_partitioning=[false] 02)--ProjectionExec: expr=[ts@0 as ts, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@10 as fv1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@11 as fv2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@12 as lv1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@13 as lv2, nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@14 as nv1, nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@15 as nv2, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@16 as rn1, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@17 as rn2, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@18 as rank1, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@19 as rank2, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@20 as dense_rank1, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@21 as dense_rank2, lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@22 as lag1, lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@23 as lag2, lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@24 as lead1, lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@25 as lead2, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@2 as fvr1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@3 as fvr2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@4 as lvr1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@5 as lvr2, lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@6 as lagr1, lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@7 as lagr2, lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@8 as leadr1, lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@9 as leadr2] -03)----BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted] -04)------BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }], mode=[Sorted] +03)----BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING], mode=[Sorted] +04)------BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING], mode=[Sorted] 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col], output_ordering=[ts@0 ASC NULLS LAST], file_type=csv, has_header=true query IIIIIIIIIIIIIIIIIIIIIIIII @@ -2739,8 +2739,8 @@ physical_plan 01)ProjectionExec: expr=[sum1@0 as sum1, sum2@1 as sum2, min1@2 as min1, min2@3 as min2, max1@4 as max1, max2@5 as max2, count1@6 as count1, count2@7 as count2, avg1@8 as avg1, avg2@9 as avg2] 02)--SortExec: TopK(fetch=5), expr=[inc_col@10 ASC NULLS LAST], preserve_partitioning=[false] 03)----ProjectionExec: expr=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@9 as sum1, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@4 as sum2, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@10 as min1, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@5 as min2, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@11 as max1, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@6 as max2, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@12 as count1, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@7 as count2, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@13 as avg1, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@8 as avg2, inc_col@3 as inc_col] -04)------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)), is_causal: false }, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)), is_causal: false }, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)), is_causal: false }, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)), is_causal: false }, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)), is_causal: false }], mode=[Sorted] -05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)), is_causal: false }, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)), is_causal: false }, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)), is_causal: false }, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)), is_causal: false }, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)), is_causal: false }], mode=[Sorted] +04)------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { name: "count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { name: "avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING], mode=[Sorted] +05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING], mode=[Sorted] 06)----------ProjectionExec: expr=[CAST(inc_col@1 AS Int64) as __common_expr_1, CAST(inc_col@1 AS Float64) as __common_expr_2, ts@0 as ts, inc_col@1 as inc_col] 07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col], output_ordering=[ts@0 ASC NULLS LAST], file_type=csv, has_header=true @@ -2791,8 +2791,8 @@ physical_plan 01)ProjectionExec: expr=[first_value1@0 as first_value1, first_value2@1 as first_value2, last_value1@2 as last_value1, last_value2@3 as last_value2, nth_value1@4 as nth_value1] 02)--SortExec: TopK(fetch=5), expr=[inc_col@5 ASC NULLS LAST], preserve_partitioning=[false] 03)----ProjectionExec: expr=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@4 as first_value1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@2 as first_value2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@5 as last_value1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@3 as last_value2, nth_value(annotated_data_finite.inc_col,Int64(2)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@6 as nth_value1, inc_col@1 as inc_col] -04)------BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)), is_causal: false }, nth_value(annotated_data_finite.inc_col,Int64(2)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Ok(Field { name: "nth_value(annotated_data_finite.inc_col,Int64(2)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted] -05)--------BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(3)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(3)), is_causal: false }], mode=[Sorted] +04)------BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING, nth_value(annotated_data_finite.inc_col,Int64(2)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { name: "nth_value(annotated_data_finite.inc_col,Int64(2)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING], mode=[Sorted] +05)--------BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING], mode=[Sorted] 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col], output_ordering=[ts@0 ASC NULLS LAST], file_type=csv, has_header=true query IIIII @@ -2835,8 +2835,8 @@ logical_plan physical_plan 01)ProjectionExec: expr=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@5 as sum1, sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@3 as sum2, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@6 as count1, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@4 as count2] 02)--GlobalLimitExec: skip=0, fetch=5 -03)----BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)), is_causal: false }, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted] -04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(3)), is_causal: false }, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(3)), is_causal: false }], mode=[Sorted] +03)----BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { name: "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING], mode=[Sorted] +04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING], mode=[Sorted] 05)--------ProjectionExec: expr=[CAST(inc_col@1 AS Int64) as __common_expr_1, ts@0 as ts, inc_col@1 as inc_col] 06)----------StreamingTableExec: partition_sizes=1, projection=[ts, inc_col], infinite_source=true, output_ordering=[ts@0 ASC NULLS LAST] @@ -2880,8 +2880,8 @@ logical_plan physical_plan 01)ProjectionExec: expr=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@5 as sum1, sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@3 as sum2, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@6 as count1, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@4 as count2] 02)--GlobalLimitExec: skip=0, fetch=5 -03)----BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)), is_causal: false }, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted] -04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(3)), is_causal: false }, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(3)), is_causal: false }], mode=[Sorted] +03)----BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { name: "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING], mode=[Sorted] +04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING], mode=[Sorted] 05)--------ProjectionExec: expr=[CAST(inc_col@1 AS Int64) as __common_expr_1, ts@0 as ts, inc_col@1 as inc_col] 06)----------StreamingTableExec: partition_sizes=1, projection=[ts, inc_col], infinite_source=true, output_ordering=[ts@0 ASC NULLS LAST] @@ -2980,12 +2980,12 @@ logical_plan physical_plan 01)ProjectionExec: expr=[a@1 as a, b@2 as b, c@3 as c, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@9 as sum1, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING@10 as sum2, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@15 as sum3, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING@16 as sum4, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@5 as sum5, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@6 as sum6, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@11 as sum7, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@12 as sum8, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@7 as sum9, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW@8 as sum10, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@13 as sum11, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING@14 as sum12] 02)--GlobalLimitExec: skip=0, fetch=5 -03)----BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)), is_causal: false }, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING: Ok(Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Preceding(UInt64(1)), is_causal: true }], mode=[Linear] -04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)), is_causal: false }, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(1)), is_causal: false }], mode=[PartiallySorted([1, 0])] -05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)), is_causal: false }, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted] -06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)), is_causal: false }, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING: Ok(Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Following(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[PartiallySorted([0])] -07)------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)), is_causal: false }, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: CurrentRow, is_causal: true }], mode=[PartiallySorted([0, 1])] -08)--------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)), is_causal: false }, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted] +03)----BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING], mode=[Linear] +04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING], mode=[PartiallySorted([1, 0])] +05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING], mode=[Sorted] +06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING], mode=[PartiallySorted([0])] +07)------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND CURRENT ROW], mode=[PartiallySorted([0, 1])] +08)--------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING], mode=[Sorted] 09)----------------ProjectionExec: expr=[CAST(c@2 AS Int64) as __common_expr_1, a@0 as a, b@1 as b, c@2 as c, d@3 as d] 10)------------------StreamingTableExec: partition_sizes=1, projection=[a, b, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST] @@ -3048,17 +3048,17 @@ logical_plan physical_plan 01)SortExec: TopK(fetch=5), expr=[c@2 ASC NULLS LAST], preserve_partitioning=[false] 02)--ProjectionExec: expr=[a@1 as a, b@2 as b, c@3 as c, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@9 as sum1, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING@10 as sum2, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@15 as sum3, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING@16 as sum4, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@5 as sum5, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@6 as sum6, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@11 as sum7, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@12 as sum8, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@7 as sum9, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW@8 as sum10, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@13 as sum11, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING@14 as sum12] -03)----BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)), is_causal: false }, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Preceding(UInt64(1)), is_causal: true }], mode=[Sorted] +03)----BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING], mode=[Sorted] 04)------SortExec: expr=[d@4 ASC NULLS LAST, a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], preserve_partitioning=[false] -05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)), is_causal: false }, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted] +05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING], mode=[Sorted] 06)----------SortExec: expr=[b@2 ASC NULLS LAST, a@1 ASC NULLS LAST, d@4 ASC NULLS LAST, c@3 ASC NULLS LAST], preserve_partitioning=[false] -07)------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)), is_causal: false }, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted] +07)------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING], mode=[Sorted] 08)--------------SortExec: expr=[b@2 ASC NULLS LAST, a@1 ASC NULLS LAST, c@3 ASC NULLS LAST], preserve_partitioning=[false] -09)----------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)), is_causal: false }, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Following(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted] +09)----------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING], mode=[Sorted] 10)------------------SortExec: expr=[a@1 ASC NULLS LAST, d@4 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], preserve_partitioning=[false] -11)--------------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)), is_causal: false }, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted] +11)--------------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND CURRENT ROW], mode=[Sorted] 12)----------------------SortExec: expr=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, d@4 ASC NULLS LAST, c@3 ASC NULLS LAST], preserve_partitioning=[false] -13)------------------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)), is_causal: false }, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted] +13)------------------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING], mode=[Sorted] 14)--------------------------ProjectionExec: expr=[CAST(c@2 AS Int64) as __common_expr_1, a@0 as a, b@1 as b, c@2 as c, d@3 as d] 15)----------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], file_type=csv, has_header=true @@ -3122,7 +3122,7 @@ physical_plan 01)ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as rn1] 02)--CoalesceBatchesExec: target_batch_size=4096, fetch=5 03)----FilterExec: row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 < 50 -04)------BoundedWindowAggExec: wdw=[row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +04)------BoundedWindowAggExec: wdw=[row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 05)--------StreamingTableExec: partition_sizes=1, projection=[a0, a, b, c, d], infinite_source=true, output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST] # Top level sort is pushed down through BoundedWindowAggExec as its SUM result does already satisfy the required @@ -3144,7 +3144,7 @@ logical_plan physical_plan 01)ProjectionExec: expr=[c9@0 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as sum1] 02)--GlobalLimitExec: skip=0, fetch=5 -03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false] 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true @@ -3229,11 +3229,11 @@ logical_plan 08)--------------TableScan: annotated_data_infinite2 projection=[a, b, c, d] physical_plan 01)ProjectionExec: expr=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum1, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as sum2, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum3, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as sum4] -02)--BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Linear] +02)--BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Linear] 03)----ProjectionExec: expr=[__common_expr_1@0 as __common_expr_1, a@1 as a, d@4 as d, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@7 as sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW] -04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] -05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[PartiallySorted([0])] -06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] +05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[PartiallySorted([0])] +06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 07)------------ProjectionExec: expr=[CAST(a@0 AS Int64) as __common_expr_1, a@0 as a, b@1 as b, c@2 as c, d@3 as d] 08)--------------StreamingTableExec: partition_sizes=1, projection=[a, b, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST] @@ -3260,17 +3260,17 @@ logical_plan 08)--------------TableScan: annotated_data_infinite2 projection=[a, b, c, d] physical_plan 01)ProjectionExec: expr=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum1, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as sum2, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum3, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as sum4] -02)--BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Linear] +02)--BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Linear] 03)----CoalesceBatchesExec: target_batch_size=4096 04)------RepartitionExec: partitioning=Hash([d@2], 2), input_partitions=2, preserve_order=true, sort_exprs=__common_expr_1@0 ASC NULLS LAST, a@1 ASC NULLS LAST 05)--------ProjectionExec: expr=[__common_expr_1@0 as __common_expr_1, a@1 as a, d@4 as d, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@7 as sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW] -06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 07)------------CoalesceBatchesExec: target_batch_size=4096 08)--------------RepartitionExec: partitioning=Hash([b@2, a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, __common_expr_1@0 ASC NULLS LAST -09)----------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[PartiallySorted([0])] +09)----------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[PartiallySorted([0])] 10)------------------CoalesceBatchesExec: target_batch_size=4096 11)--------------------RepartitionExec: partitioning=Hash([a@1, d@4], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, __common_expr_1@0 ASC NULLS LAST -12)----------------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +12)----------------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 13)------------------------CoalesceBatchesExec: target_batch_size=4096 14)--------------------------RepartitionExec: partitioning=Hash([a@1, b@2], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, __common_expr_1@0 ASC NULLS LAST 15)----------------------------ProjectionExec: expr=[CAST(a@0 AS Int64) as __common_expr_1, a@0 as a, b@1 as b, c@2 as c, d@3 as d] @@ -3329,7 +3329,7 @@ logical_plan physical_plan 01)SortExec: TopK(fetch=5), expr=[c3@0 ASC NULLS LAST], preserve_partitioning=[false] 02)--ProjectionExec: expr=[c3@0 as c3, max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as min1, min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@2 as max1] -03)----BoundedWindowAggExec: wdw=[max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Float64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +03)----BoundedWindowAggExec: wdw=[max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 04)------SortExec: expr=[c12@1 ASC NULLS LAST], preserve_partitioning=[false] 05)--------ProjectionExec: expr=[c3@0 as c3, c12@2 as c12, min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@3 as min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING] 06)----------WindowAggExec: wdw=[min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] @@ -3373,7 +3373,7 @@ physical_plan 01)ProjectionExec: expr=[min1@0 as min1, max1@1 as max1] 02)--SortExec: TopK(fetch=5), expr=[c3@2 ASC NULLS LAST], preserve_partitioning=[false] 03)----ProjectionExec: expr=[max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as min1, min(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as max1, c3@0 as c3] -04)------BoundedWindowAggExec: wdw=[max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Float64(NULL)), end_bound: CurrentRow, is_causal: false }, min(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "min(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Float64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +04)------BoundedWindowAggExec: wdw=[max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "min(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 05)--------SortExec: expr=[c12@1 ASC NULLS LAST], preserve_partitioning=[false] 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c3, c12], file_type=csv, has_header=true @@ -3425,7 +3425,7 @@ logical_plan 02)--Filter: multiple_ordered_table.b = Int32(0) 03)----TableScan: multiple_ordered_table projection=[a0, a, b, c, d], partial_filters=[multiple_ordered_table.b = Int32(0)] physical_plan -01)BoundedWindowAggExec: wdw=[sum(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +01)BoundedWindowAggExec: wdw=[sum(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 02)--CoalesceBatchesExec: target_batch_size=4096 03)----FilterExec: b@2 = 0 04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_orderings=[[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST], [c@3 ASC NULLS LAST]], file_type=csv, has_header=true @@ -3443,7 +3443,7 @@ logical_plan 02)--Filter: multiple_ordered_table.b = Int32(0) 03)----TableScan: multiple_ordered_table projection=[a0, a, b, c, d], partial_filters=[multiple_ordered_table.b = Int32(0)] physical_plan -01)BoundedWindowAggExec: wdw=[sum(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.b ASC NULLS LAST, multiple_ordered_table.d ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.b ASC NULLS LAST, multiple_ordered_table.d ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +01)BoundedWindowAggExec: wdw=[sum(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.b ASC NULLS LAST, multiple_ordered_table.d ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.b ASC NULLS LAST, multiple_ordered_table.d ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 02)--SortExec: expr=[d@4 ASC NULLS LAST], preserve_partitioning=[false] 03)----CoalesceBatchesExec: target_batch_size=4096 04)------FilterExec: b@2 = 0 @@ -3480,9 +3480,9 @@ logical_plan 05)--------TableScan: multiple_ordered_table projection=[a, b, c, d] physical_plan 01)ProjectionExec: expr=[min(multiple_ordered_table.d) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as min1, max(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as max1] -02)--BoundedWindowAggExec: wdw=[min(multiple_ordered_table.d) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "min(multiple_ordered_table.d) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +02)--BoundedWindowAggExec: wdw=[min(multiple_ordered_table.d) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "min(multiple_ordered_table.d) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 03)----ProjectionExec: expr=[c@2 as c, d@3 as d, max(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as max(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW] -04)------BoundedWindowAggExec: wdw=[max(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "max(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +04)------BoundedWindowAggExec: wdw=[max(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "max(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_orderings=[[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], [c@2 ASC NULLS LAST]], file_type=csv, has_header=true query TT @@ -3499,7 +3499,7 @@ logical_plan 04)------TableScan: multiple_ordered_table projection=[c, d], partial_filters=[multiple_ordered_table.d = Int32(0)] physical_plan 01)ProjectionExec: expr=[max(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as max_c] -02)--BoundedWindowAggExec: wdw=[max(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "max(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +02)--BoundedWindowAggExec: wdw=[max(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "max(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 03)----CoalesceBatchesExec: target_batch_size=4096 04)------FilterExec: d@1 = 0 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c, d], output_ordering=[c@0 ASC NULLS LAST], file_type=csv, has_header=true @@ -3514,7 +3514,7 @@ logical_plan 03)----TableScan: multiple_ordered_table projection=[a, c, d] physical_plan 01)ProjectionExec: expr=[sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c] ORDER BY [multiple_ordered_table.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c] ORDER BY [multiple_ordered_table.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW] -02)--BoundedWindowAggExec: wdw=[sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c] ORDER BY [multiple_ordered_table.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c] ORDER BY [multiple_ordered_table.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +02)--BoundedWindowAggExec: wdw=[sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c] ORDER BY [multiple_ordered_table.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c] ORDER BY [multiple_ordered_table.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c, d], output_orderings=[[a@0 ASC NULLS LAST], [c@1 ASC NULLS LAST]], file_type=csv, has_header=true query TT @@ -3527,7 +3527,7 @@ logical_plan 03)----TableScan: multiple_ordered_table projection=[a, b, c, d] physical_plan 01)ProjectionExec: expr=[sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW] -02)--BoundedWindowAggExec: wdw=[sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +02)--BoundedWindowAggExec: wdw=[sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_orderings=[[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], [c@2 ASC NULLS LAST]], file_type=csv, has_header=true query I @@ -3620,7 +3620,7 @@ logical_plan physical_plan 01)SortPreservingMergeExec: [c@3 ASC NULLS LAST] 02)--ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, avg(multiple_ordered_table_inf.d) PARTITION BY [multiple_ordered_table_inf.d] ORDER BY [multiple_ordered_table_inf.a ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND CURRENT ROW@5 as avg_d] -03)----BoundedWindowAggExec: wdw=[avg(multiple_ordered_table_inf.d) PARTITION BY [multiple_ordered_table_inf.d] ORDER BY [multiple_ordered_table_inf.a ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND CURRENT ROW: Ok(Field { name: "avg(multiple_ordered_table_inf.d) PARTITION BY [multiple_ordered_table_inf.d] ORDER BY [multiple_ordered_table_inf.a ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: CurrentRow, is_causal: false }], mode=[Linear] +03)----BoundedWindowAggExec: wdw=[avg(multiple_ordered_table_inf.d) PARTITION BY [multiple_ordered_table_inf.d] ORDER BY [multiple_ordered_table_inf.a ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND CURRENT ROW: Field { name: "avg(multiple_ordered_table_inf.d) PARTITION BY [multiple_ordered_table_inf.d] ORDER BY [multiple_ordered_table_inf.a ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 10 PRECEDING AND CURRENT ROW], mode=[Linear] 04)------CoalesceBatchesExec: target_batch_size=4096 05)--------RepartitionExec: partitioning=Hash([d@4], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST 06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 @@ -3955,7 +3955,7 @@ logical_plan 03)----TableScan: table_with_pk projection=[sn, ts, currency, amount] physical_plan 01)ProjectionExec: expr=[sn@0 as sn, ts@1 as ts, currency@2 as currency, amount@3 as amount, sum(table_with_pk.amount) ORDER BY [table_with_pk.sn ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum1] -02)--BoundedWindowAggExec: wdw=[sum(table_with_pk.amount) ORDER BY [table_with_pk.sn ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(table_with_pk.amount) ORDER BY [table_with_pk.sn ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted] +02)--BoundedWindowAggExec: wdw=[sum(table_with_pk.amount) ORDER BY [table_with_pk.sn ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(table_with_pk.amount) ORDER BY [table_with_pk.sn ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 03)----SortExec: expr=[sn@0 ASC NULLS LAST], preserve_partitioning=[false] 04)------DataSourceExec: partitions=1, partition_sizes=[1] @@ -4076,7 +4076,7 @@ physical_plan 02)--GlobalLimitExec: skip=0, fetch=5 03)----WindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int16(NULL)), is_causal: false }] 04)------ProjectionExec: expr=[c3@0 as c3, c4@1 as c4, c9@2 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum1] -05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int16(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 06)----------SortExec: expr=[c3@0 + c4@1 DESC], preserve_partitioning=[false] 07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c3, c4, c9], file_type=csv, has_header=true @@ -4115,7 +4115,7 @@ logical_plan 04)------TableScan: a projection=[a] physical_plan 01)ProjectionExec: expr=[count(Int64(1)) PARTITION BY [a.a] ORDER BY [a.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as count(*) PARTITION BY [a.a] ORDER BY [a.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW] -02)--BoundedWindowAggExec: wdw=[count(Int64(1)) PARTITION BY [a.a] ORDER BY [a.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "count(Int64(1)) PARTITION BY [a.a] ORDER BY [a.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +02)--BoundedWindowAggExec: wdw=[count(Int64(1)) PARTITION BY [a.a] ORDER BY [a.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "count(Int64(1)) PARTITION BY [a.a] ORDER BY [a.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 03)----CoalesceBatchesExec: target_batch_size=4096 04)------RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2 05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 @@ -4138,7 +4138,7 @@ logical_plan 04)------TableScan: a projection=[a] physical_plan 01)ProjectionExec: expr=[row_number() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as row_number() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING] -02)--BoundedWindowAggExec: wdw=[row_number() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "row_number() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted] +02)--BoundedWindowAggExec: wdw=[row_number() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted] 03)----CoalesceBatchesExec: target_batch_size=4096 04)------RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2 05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 @@ -4341,6 +4341,9 @@ LIMIT 5; 24 31 14 94 +statement ok +set datafusion.execution.batch_size = 100; + # Tests schema and data are in sync for mixed nulls and not nulls values for builtin window function query T select lag(a) over (order by a ASC NULLS FIRST) as x1 @@ -4938,11 +4941,11 @@ FROM (SELECT c1, c2, ROW_NUMBER() OVER() as rn FROM t LIMIT 5) GROUP BY rn -ORDER BY rn; +ORDER BY 1, 2, 3 ---- 1 a 1 -2 b 2 1 a 3 +2 b 2 3 NULL 4 NULL a4 5 @@ -5181,6 +5184,10 @@ order by c1; 3 1 1 3 10 2 + +statement ok +set datafusion.execution.batch_size = 1; + # push filter since it uses a partition column query TT explain select c1, c2, rank @@ -5200,7 +5207,7 @@ logical_plan physical_plan 01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, rank@2 ASC NULLS LAST] 02)--ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rank] -03)----BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +03)----BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 04)------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true] 05)--------CoalesceBatchesExec: target_batch_size=1 06)----------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2 @@ -5244,7 +5251,7 @@ physical_plan 02)--ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rank] 03)----CoalesceBatchesExec: target_batch_size=1 04)------FilterExec: c2@1 >= 10 -05)--------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +05)--------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 06)----------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true] 07)------------CoalesceBatchesExec: target_batch_size=1 08)--------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2 @@ -5286,7 +5293,7 @@ physical_plan 02)--ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rank] 03)----CoalesceBatchesExec: target_batch_size=1 04)------FilterExec: c2@1 = 10 -05)--------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +05)--------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 06)----------SortExec: expr=[c2@1 ASC NULLS LAST], preserve_partitioning=[true] 07)------------CoalesceBatchesExec: target_batch_size=1 08)--------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2 @@ -5327,7 +5334,7 @@ physical_plan 02)--ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rank] 03)----CoalesceBatchesExec: target_batch_size=1 04)------FilterExec: c1@0 = 1 OR c2@1 = 10 -05)--------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +05)--------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 06)----------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true] 07)------------CoalesceBatchesExec: target_batch_size=1 08)--------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2 @@ -5370,11 +5377,11 @@ physical_plan 01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, rank1@2 ASC NULLS LAST, rank2@3 ASC NULLS LAST] 02)--SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, rank1@2 ASC NULLS LAST, rank2@3 ASC NULLS LAST], preserve_partitioning=[true] 03)----ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rank1, rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as rank2] -04)------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +04)------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 05)--------SortExec: expr=[c2@1 ASC NULLS LAST, c1@0 ASC NULLS LAST], preserve_partitioning=[true] 06)----------CoalesceBatchesExec: target_batch_size=1 07)------------RepartitionExec: partitioning=Hash([c2@1, c1@0], 2), input_partitions=2 -08)--------------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +08)--------------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 09)----------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true] 10)------------------CoalesceBatchesExec: target_batch_size=1 11)--------------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2 @@ -5421,13 +5428,13 @@ physical_plan 01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, rank1@2 ASC NULLS LAST, rank2@3 ASC NULLS LAST] 02)--SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, rank1@2 ASC NULLS LAST, rank2@3 ASC NULLS LAST], preserve_partitioning=[true] 03)----ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rank1, rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as rank2] -04)------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +04)------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 05)--------SortExec: expr=[c2@1 ASC NULLS LAST, c1@0 ASC NULLS LAST], preserve_partitioning=[true] 06)----------CoalesceBatchesExec: target_batch_size=1 07)------------RepartitionExec: partitioning=Hash([c2@1, c1@0], 2), input_partitions=2 08)--------------CoalesceBatchesExec: target_batch_size=1 09)----------------FilterExec: c2@1 > 1 -10)------------------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] +10)------------------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] 11)--------------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true] 12)----------------------CoalesceBatchesExec: target_batch_size=1 13)------------------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2 @@ -5507,6 +5514,7 @@ physical_plan 02)--WindowAggExec: wdw=[sum(aggregate_test_100_ordered.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100_ordered.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true + query TT EXPLAIN SELECT c1, MIN(c5) OVER(PARTITION BY c1) as min_c5 FROM aggregate_test_100_ordered ORDER BY c1, min_c5 DESC NULLS LAST; ---- diff --git a/datafusion/substrait/src/logical_plan/consumer/expr/aggregate_function.rs b/datafusion/substrait/src/logical_plan/consumer/expr/aggregate_function.rs index 114fe1e7aecd5..62e140acc27b3 100644 --- a/datafusion/substrait/src/logical_plan/consumer/expr/aggregate_function.rs +++ b/datafusion/substrait/src/logical_plan/consumer/expr/aggregate_function.rs @@ -30,7 +30,7 @@ pub async fn from_substrait_agg_func( f: &AggregateFunction, input_schema: &DFSchema, filter: Option>, - order_by: Option>, + order_by: Vec, distinct: bool, ) -> datafusion::common::Result> { let Some(fn_signature) = consumer diff --git a/datafusion/substrait/src/logical_plan/consumer/rel/aggregate_rel.rs b/datafusion/substrait/src/logical_plan/consumer/rel/aggregate_rel.rs index 9421bb17c1628..c919bd038936d 100644 --- a/datafusion/substrait/src/logical_plan/consumer/rel/aggregate_rel.rs +++ b/datafusion/substrait/src/logical_plan/consumer/rel/aggregate_rel.rs @@ -88,14 +88,8 @@ pub async fn from_aggregate_rel( _ if f.invocation == AggregationInvocation::All as i32 => false, _ => false, }; - let order_by = if !f.sorts.is_empty() { - Some( - from_substrait_sorts(consumer, &f.sorts, input.schema()) - .await?, - ) - } else { - None - }; + let order_by = + from_substrait_sorts(consumer, &f.sorts, input.schema()).await?; from_substrait_agg_func( consumer, diff --git a/datafusion/substrait/src/logical_plan/consumer/rel/cross_rel.rs b/datafusion/substrait/src/logical_plan/consumer/rel/cross_rel.rs index a91366e47742d..25c66a8e22972 100644 --- a/datafusion/substrait/src/logical_plan/consumer/rel/cross_rel.rs +++ b/datafusion/substrait/src/logical_plan/consumer/rel/cross_rel.rs @@ -15,9 +15,11 @@ // specific language governing permissions and limitations // under the License. -use crate::logical_plan::consumer::utils::requalify_sides_if_needed; use crate::logical_plan::consumer::SubstraitConsumer; use datafusion::logical_expr::{LogicalPlan, LogicalPlanBuilder}; + +use datafusion::logical_expr::requalify_sides_if_needed; + use substrait::proto::CrossRel; pub async fn from_cross_rel( @@ -30,6 +32,6 @@ pub async fn from_cross_rel( let right = LogicalPlanBuilder::from( consumer.consume_rel(cross.right.as_ref().unwrap()).await?, ); - let (left, right) = requalify_sides_if_needed(left, right)?; + let (left, right, _requalified) = requalify_sides_if_needed(left, right)?; left.cross_join(right.build()?)?.build() } diff --git a/datafusion/substrait/src/logical_plan/consumer/rel/join_rel.rs b/datafusion/substrait/src/logical_plan/consumer/rel/join_rel.rs index 0cf920dd6260b..ade8a4e77e65a 100644 --- a/datafusion/substrait/src/logical_plan/consumer/rel/join_rel.rs +++ b/datafusion/substrait/src/logical_plan/consumer/rel/join_rel.rs @@ -15,13 +15,14 @@ // specific language governing permissions and limitations // under the License. -use crate::logical_plan::consumer::utils::requalify_sides_if_needed; use crate::logical_plan::consumer::SubstraitConsumer; use datafusion::common::{not_impl_err, plan_err, Column, JoinType, NullEquality}; +use datafusion::logical_expr::requalify_sides_if_needed; use datafusion::logical_expr::utils::split_conjunction; use datafusion::logical_expr::{ BinaryExpr, Expr, LogicalPlan, LogicalPlanBuilder, Operator, }; + use substrait::proto::{join_rel, JoinRel}; pub async fn from_join_rel( @@ -38,7 +39,7 @@ pub async fn from_join_rel( let right = LogicalPlanBuilder::from( consumer.consume_rel(join.right.as_ref().unwrap()).await?, ); - let (left, right) = requalify_sides_if_needed(left, right)?; + let (left, right, _requalified) = requalify_sides_if_needed(left, right)?; let join_type = from_substrait_jointype(join.r#type)?; // The join condition expression needs full input schema and not the output schema from join since we lose columns from diff --git a/datafusion/substrait/src/logical_plan/consumer/rel/read_rel.rs b/datafusion/substrait/src/logical_plan/consumer/rel/read_rel.rs index f1cbd16d2d8f2..3ea318b214631 100644 --- a/datafusion/substrait/src/logical_plan/consumer/rel/read_rel.rs +++ b/datafusion/substrait/src/logical_plan/consumer/rel/read_rel.rs @@ -114,14 +114,37 @@ pub async fn from_read_rel( .await } Some(ReadType::VirtualTable(vt)) => { - if vt.values.is_empty() { + if vt.values.is_empty() && vt.expressions.is_empty() { return Ok(LogicalPlan::EmptyRelation(EmptyRelation { produce_one_row: false, schema: DFSchemaRef::new(substrait_schema), })); } - let values = vt + let values = if !vt.expressions.is_empty() { + let mut exprs = vec![]; + for row in &vt.expressions { + let mut name_idx = 0; + let mut row_exprs = vec![]; + for expression in &row.fields { + name_idx += 1; + let expr = consumer + .consume_expression(expression, &DFSchema::empty()) + .await?; + row_exprs.push(expr); + } + if name_idx != named_struct.names.len() { + return substrait_err!( + "Names list must match exactly to nested schema, but found {} uses for {} names", + name_idx, + named_struct.names.len() + ); + } + exprs.push(row_exprs); + } + exprs + } else { + vt .values .iter() .map(|row| { @@ -148,7 +171,8 @@ pub async fn from_read_rel( } Ok(lits) }) - .collect::>()?; + .collect::>()? + }; Ok(LogicalPlan::Values(Values { schema: DFSchemaRef::new(substrait_schema), diff --git a/datafusion/substrait/src/logical_plan/consumer/types.rs b/datafusion/substrait/src/logical_plan/consumer/types.rs index 4fc7a92804b4e..80300af24ac4a 100644 --- a/datafusion/substrait/src/logical_plan/consumer/types.rs +++ b/datafusion/substrait/src/logical_plan/consumer/types.rs @@ -22,7 +22,8 @@ use crate::variation_const::{ DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF, DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF, DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_INTERVAL_DAY_TYPE_VARIATION_REF, - DEFAULT_TYPE_VARIATION_REF, DURATION_INTERVAL_DAY_TYPE_VARIATION_REF, + DEFAULT_MAP_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF, + DICTIONARY_MAP_TYPE_VARIATION_REF, DURATION_INTERVAL_DAY_TYPE_VARIATION_REF, INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_NAME, INTERVAL_MONTH_DAY_NANO_TYPE_REF, INTERVAL_YEAR_MONTH_TYPE_REF, LARGE_CONTAINER_TYPE_VARIATION_REF, TIMESTAMP_MICRO_TYPE_VARIATION_REF, @@ -177,24 +178,32 @@ pub fn from_substrait_type( let value_type = map.value.as_ref().ok_or_else(|| { substrait_datafusion_err!("Map type must have value type") })?; - let key_field = Arc::new(Field::new( - "key", - from_substrait_type(consumer, key_type, dfs_names, name_idx)?, - false, - )); - let value_field = Arc::new(Field::new( - "value", - from_substrait_type(consumer, value_type, dfs_names, name_idx)?, - true, - )); - Ok(DataType::Map( - Arc::new(Field::new_struct( - "entries", - [key_field, value_field], - false, // The inner map field is always non-nullable (Arrow #1697), + let key_type = + from_substrait_type(consumer, key_type, dfs_names, name_idx)?; + let value_type = + from_substrait_type(consumer, value_type, dfs_names, name_idx)?; + + match map.type_variation_reference { + DEFAULT_MAP_TYPE_VARIATION_REF => { + let key_field = Arc::new(Field::new("key", key_type, false)); + let value_field = Arc::new(Field::new("value", value_type, true)); + Ok(DataType::Map( + Arc::new(Field::new_struct( + "entries", + [key_field, value_field], + false, // The inner map field is always non-nullable (Arrow #1697), + )), + false, // whether keys are sorted + )) + } + DICTIONARY_MAP_TYPE_VARIATION_REF => Ok(DataType::Dictionary( + Box::new(key_type), + Box::new(value_type), )), - false, // whether keys are sorted - )) + v => not_impl_err!( + "Unsupported Substrait type variation {v} of type {s_kind:?}" + ), + } } r#type::Kind::Decimal(d) => match d.type_variation_reference { DECIMAL_128_TYPE_VARIATION_REF => { diff --git a/datafusion/substrait/src/logical_plan/consumer/utils.rs b/datafusion/substrait/src/logical_plan/consumer/utils.rs index b546ec3b1d908..67215e8e343e9 100644 --- a/datafusion/substrait/src/logical_plan/consumer/utils.rs +++ b/datafusion/substrait/src/logical_plan/consumer/utils.rs @@ -19,10 +19,10 @@ use crate::logical_plan::consumer::SubstraitConsumer; use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit, UnionFields}; use datafusion::common::{ exec_err, not_impl_err, substrait_datafusion_err, substrait_err, DFSchema, - DFSchemaRef, TableReference, + DFSchemaRef, }; use datafusion::logical_expr::expr::Sort; -use datafusion::logical_expr::{Cast, Expr, ExprSchemable, LogicalPlanBuilder}; +use datafusion::logical_expr::{Cast, Expr, ExprSchemable}; use std::collections::HashSet; use std::sync::Arc; use substrait::proto::sort_field::SortDirection; @@ -36,33 +36,6 @@ use substrait::proto::SortField; // https://github.com/apache/arrow-rs/blob/ee5694078c86c8201549654246900a4232d531a9/arrow-cast/src/cast/mod.rs#L1749). pub(super) const DEFAULT_TIMEZONE: &str = "UTC"; -/// (Re)qualify the sides of a join if needed, i.e. if the columns from one side would otherwise -/// conflict with the columns from the other. -/// Substrait doesn't currently allow specifying aliases, neither for columns nor for tables. For -/// Substrait the names don't matter since it only refers to columns by indices, however DataFusion -/// requires columns to be uniquely identifiable, in some places (see e.g. DFSchema::check_names). -pub(super) fn requalify_sides_if_needed( - left: LogicalPlanBuilder, - right: LogicalPlanBuilder, -) -> datafusion::common::Result<(LogicalPlanBuilder, LogicalPlanBuilder)> { - let left_cols = left.schema().columns(); - let right_cols = right.schema().columns(); - if left_cols.iter().any(|l| { - right_cols.iter().any(|r| { - l == r || (l.name == r.name && (l.relation.is_none() || r.relation.is_none())) - }) - }) { - // These names have no connection to the original plan, but they'll make the columns - // (mostly) unique. - Ok(( - left.alias(TableReference::bare("left"))?, - right.alias(TableReference::bare("right"))?, - )) - } else { - Ok((left, right)) - } -} - pub(super) fn next_struct_field_name( column_idx: usize, dfs_names: &[String], diff --git a/datafusion/substrait/src/logical_plan/producer/expr/aggregate_function.rs b/datafusion/substrait/src/logical_plan/producer/expr/aggregate_function.rs index 0619b497532d8..1e79897a1b770 100644 --- a/datafusion/substrait/src/logical_plan/producer/expr/aggregate_function.rs +++ b/datafusion/substrait/src/logical_plan/producer/expr/aggregate_function.rs @@ -43,14 +43,10 @@ pub fn from_aggregate_function( null_treatment: _null_treatment, }, } = agg_fn; - let sorts = if let Some(order_by) = order_by { - order_by - .iter() - .map(|expr| to_substrait_sort_field(producer, expr, schema)) - .collect::>>()? - } else { - vec![] - }; + let sorts = order_by + .iter() + .map(|expr| to_substrait_sort_field(producer, expr, schema)) + .collect::>>()?; let mut arguments: Vec = vec![]; for arg in args { arguments.push(FunctionArgument { diff --git a/datafusion/substrait/src/logical_plan/producer/types.rs b/datafusion/substrait/src/logical_plan/producer/types.rs index 0c9266347529d..d819c2042c08a 100644 --- a/datafusion/substrait/src/logical_plan/producer/types.rs +++ b/datafusion/substrait/src/logical_plan/producer/types.rs @@ -21,7 +21,8 @@ use crate::variation_const::{ DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF, DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF, DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_INTERVAL_DAY_TYPE_VARIATION_REF, - DEFAULT_TYPE_VARIATION_REF, DURATION_INTERVAL_DAY_TYPE_VARIATION_REF, + DEFAULT_MAP_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF, + DICTIONARY_MAP_TYPE_VARIATION_REF, DURATION_INTERVAL_DAY_TYPE_VARIATION_REF, LARGE_CONTAINER_TYPE_VARIATION_REF, TIME_32_TYPE_VARIATION_REF, TIME_64_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF, VIEW_CONTAINER_TYPE_VARIATION_REF, @@ -276,13 +277,25 @@ pub(crate) fn to_substrait_type( kind: Some(r#type::Kind::Map(Box::new(r#type::Map { key: Some(Box::new(key_type)), value: Some(Box::new(value_type)), - type_variation_reference: DEFAULT_CONTAINER_TYPE_VARIATION_REF, + type_variation_reference: DEFAULT_MAP_TYPE_VARIATION_REF, nullability, }))), }) } _ => plan_err!("Map fields must contain a Struct with exactly 2 fields"), }, + DataType::Dictionary(key_type, value_type) => { + let key_type = to_substrait_type(key_type, nullable)?; + let value_type = to_substrait_type(value_type, nullable)?; + Ok(substrait::proto::Type { + kind: Some(r#type::Kind::Map(Box::new(r#type::Map { + key: Some(Box::new(key_type)), + value: Some(Box::new(value_type)), + type_variation_reference: DICTIONARY_MAP_TYPE_VARIATION_REF, + nullability, + }))), + }) + } DataType::Struct(fields) => { let field_types = fields .iter() @@ -407,6 +420,10 @@ mod tests { .into(), false, ))?; + round_trip_type(DataType::Dictionary( + Box::new(DataType::Utf8), + Box::new(DataType::Int32), + ))?; round_trip_type(DataType::Struct( vec![ diff --git a/datafusion/substrait/src/variation_const.rs b/datafusion/substrait/src/variation_const.rs index 74fc6035efae7..a967e7d5ae482 100644 --- a/datafusion/substrait/src/variation_const.rs +++ b/datafusion/substrait/src/variation_const.rs @@ -55,6 +55,8 @@ pub const TIME_64_TYPE_VARIATION_REF: u32 = 1; pub const DEFAULT_CONTAINER_TYPE_VARIATION_REF: u32 = 0; pub const LARGE_CONTAINER_TYPE_VARIATION_REF: u32 = 1; pub const VIEW_CONTAINER_TYPE_VARIATION_REF: u32 = 2; +pub const DEFAULT_MAP_TYPE_VARIATION_REF: u32 = 0; +pub const DICTIONARY_MAP_TYPE_VARIATION_REF: u32 = 1; pub const DECIMAL_128_TYPE_VARIATION_REF: u32 = 0; pub const DECIMAL_256_TYPE_VARIATION_REF: u32 = 1; /// Used for the arrow type [`DataType::Interval`] with [`IntervalUnit::DayTime`]. diff --git a/datafusion/substrait/tests/cases/consumer_integration.rs b/datafusion/substrait/tests/cases/consumer_integration.rs index 4a121e41d27e7..4d82f0fbd0213 100644 --- a/datafusion/substrait/tests/cases/consumer_integration.rs +++ b/datafusion/substrait/tests/cases/consumer_integration.rs @@ -519,6 +519,21 @@ mod tests { Ok(()) } + #[tokio::test] + async fn test_expressions_in_virtual_table() -> Result<()> { + let plan_str = + test_plan_to_string("virtual_table_with_expressions.substrait.json").await?; + + assert_snapshot!( + plan_str, + @r#" + Projection: dummy1 AS result1, dummy2 AS result2 + Values: (Int64(0), Utf8("temp")), (Int64(1), Utf8("test")) + "# + ); + Ok(()) + } + #[tokio::test] async fn test_multiple_joins() -> Result<()> { let plan_str = test_plan_to_string("multiple_joins.json").await?; @@ -584,4 +599,33 @@ mod tests { Ok(()) } + + #[tokio::test] + async fn test_join_with_expression_key() -> Result<()> { + let plan_str = test_plan_to_string("join_with_expression_key.json").await?; + assert_snapshot!( + plan_str, + @r#" + Projection: left.index_name AS index, right.upper(host) AS host, left.max(size_bytes) AS idx_size, right.max(total_bytes) AS db_size, CAST(left.max(size_bytes) AS Float64) / CAST(right.max(total_bytes) AS Float64) * Float64(100) AS pct_of_db + Inner Join: left.upper(host) = right.upper(host) + SubqueryAlias: left + Aggregate: groupBy=[[index_name, upper(host)]], aggr=[[max(size_bytes)]] + Projection: size_bytes, index_name, upper(host) + Filter: index_name = Utf8("aaa") + Values: (Utf8("aaa"), Utf8("host-a"), Int64(128)), (Utf8("bbb"), Utf8("host-b"), Int64(256)) + SubqueryAlias: right + Aggregate: groupBy=[[upper(host)]], aggr=[[max(total_bytes)]] + Projection: total_bytes, upper(host) + Inner Join: Filter: upper(host) = upper(host) + Values: (Utf8("host-a"), Int64(107)), (Utf8("host-b"), Int64(214)) + Projection: upper(host) + Aggregate: groupBy=[[index_name, upper(host)]], aggr=[[max(size_bytes)]] + Projection: size_bytes, index_name, upper(host) + Filter: index_name = Utf8("aaa") + Values: (Utf8("aaa"), Utf8("host-a"), Int64(128)), (Utf8("bbb"), Utf8("host-b"), Int64(256)) + "# + ); + + Ok(()) + } } diff --git a/datafusion/substrait/tests/testdata/test_plans/join_with_expression_key.json b/datafusion/substrait/tests/testdata/test_plans/join_with_expression_key.json new file mode 100644 index 0000000000000..73fa06eea5f05 --- /dev/null +++ b/datafusion/substrait/tests/testdata/test_plans/join_with_expression_key.json @@ -0,0 +1,814 @@ +{ + "extensionUris": [{ + "extensionUriAnchor": 3, + "uri": "/functions_arithmetic.yaml" + }, { + "extensionUriAnchor": 2, + "uri": "/functions_string.yaml" + }, { + "extensionUriAnchor": 1, + "uri": "/functions_comparison.yaml" + }], + "extensions": [{ + "extensionFunction": { + "extensionUriReference": 1, + "functionAnchor": 0, + "name": "equal:any_any" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 1, + "name": "upper:str" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 2, + "name": "max:i64" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 3, + "name": "multiply:fp64_fp64" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 4, + "name": "divide:fp64_fp64" + } + }], + "relations": [{ + "root": { + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [5, 6, 7, 8, 9] + } + }, + "input": { + "join": { + "common": { + "direct": { + } + }, + "left": { + "aggregate": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [3, 4, 5] + } + }, + "input": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["index_name", "host", "size_bytes"], + "struct": { + "types": [{ + "string": { + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "string": { + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "nullability": "NULLABILITY_NULLABLE" + } + }], + "nullability": "NULLABILITY_REQUIRED" + } + }, + "virtualTable": { + "values": [{ + "fields": [{ + "string": "aaa", + "nullable": true + }, { + "string": "host-a", + "nullable": true + }, { + "i64": "128", + "nullable": true + }] + }, { + "fields": [{ + "string": "bbb", + "nullable": true + }, { + "string": "host-b", + "nullable": true + }, { + "i64": "256", + "nullable": true + }] + }] + } + } + }, + "condition": { + "scalarFunction": { + "functionReference": 0, + "outputType": { + "bool": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "string": "aaa" + } + } + }] + } + } + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }, { + "scalarFunction": { + "functionReference": 1, + "outputType": { + "string": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + } + }] + } + }] + } + }, + "groupings": [{ + "groupingExpressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + }] + }], + "measures": [{ + "measure": { + "functionReference": 2, + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "i64": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }] + } + }] + } + }, + "right": { + "aggregate": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [3, 4] + } + }, + "input": { + "join": { + "common": { + "direct": { + } + }, + "left": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["host", "total_bytes"], + "struct": { + "types": [{ + "string": { + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "nullability": "NULLABILITY_NULLABLE" + } + }], + "nullability": "NULLABILITY_REQUIRED" + } + }, + "virtualTable": { + "values": [{ + "fields": [{ + "string": "host-a", + "nullable": true + }, { + "i64": "107", + "nullable": true + }] + }, { + "fields": [{ + "string": "host-b", + "nullable": true + }, { + "i64": "214", + "nullable": true + }] + }] + } + } + }, + "right": { + "project": { + "common": { + "emit": { + "outputMapping": [3] + } + }, + "input": { + "aggregate": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [3, 4, 5] + } + }, + "input": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["index_name", "host", "size_bytes"], + "struct": { + "types": [{ + "string": { + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "string": { + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "nullability": "NULLABILITY_NULLABLE" + } + }], + "nullability": "NULLABILITY_REQUIRED" + } + }, + "virtualTable": { + "values": [{ + "fields": [{ + "string": "aaa", + "nullable": true + }, { + "string": "host-a", + "nullable": true + }, { + "i64": "128", + "nullable": true + }] + }, { + "fields": [{ + "string": "bbb", + "nullable": true + }, { + "string": "host-b", + "nullable": true + }, { + "i64": "256", + "nullable": true + }] + }] + } + } + }, + "condition": { + "scalarFunction": { + "functionReference": 0, + "outputType": { + "bool": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "string": "aaa" + } + } + }] + } + } + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }, { + "scalarFunction": { + "functionReference": 1, + "outputType": { + "string": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + } + }] + } + }] + } + }, + "groupings": [{ + "groupingExpressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + }] + }], + "measures": [{ + "measure": { + "functionReference": 2, + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "i64": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }] + } + }] + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }] + } + }, + "expression": { + "scalarFunction": { + "functionReference": 0, + "outputType": { + "bool": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 1, + "outputType": { + "string": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }] + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + } + }] + } + }, + "type": "JOIN_TYPE_INNER" + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }, { + "scalarFunction": { + "functionReference": 1, + "outputType": { + "string": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }] + } + }] + } + }, + "groupings": [{ + "groupingExpressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }] + }], + "measures": [{ + "measure": { + "functionReference": 2, + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "i64": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }] + } + }] + } + }, + "expression": { + "scalarFunction": { + "functionReference": 0, + "outputType": { + "bool": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 3 + } + }, + "rootReference": { + } + } + } + }] + } + }, + "type": "JOIN_TYPE_INNER" + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 3 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + }, { + "scalarFunction": { + "functionReference": 3, + "outputType": { + "fp64": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 4, + "outputType": { + "fp64": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "cast": { + "type": { + "fp64": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_THROW_EXCEPTION" + } + } + }, { + "value": { + "cast": { + "type": { + "fp64": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_THROW_EXCEPTION" + } + } + }] + } + } + }, { + "value": { + "literal": { + "fp64": 100.0 + } + } + }] + } + }] + } + }, + "names": ["index", "host", "idx_size", "db_size", "pct_of_db"] + } + }] +} \ No newline at end of file diff --git a/datafusion/substrait/tests/testdata/test_plans/virtual_table_with_expressions.substrait.json b/datafusion/substrait/tests/testdata/test_plans/virtual_table_with_expressions.substrait.json new file mode 100644 index 0000000000000..2c634fa957579 --- /dev/null +++ b/datafusion/substrait/tests/testdata/test_plans/virtual_table_with_expressions.substrait.json @@ -0,0 +1,75 @@ +{ + "relations": [ + { + "root": { + "input": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": [ + "dummy1", "dummy2" + ], + "struct": { + "types": [ + { + "i64": { + "nullability": "NULLABILITY_REQUIRED" + } + }, + { + "string": { + "nullability": "NULLABILITY_REQUIRED" + } + } + ], + "nullability": "NULLABILITY_REQUIRED" + } + }, + "virtualTable": { + "expressions": [ + { + "fields": [ + { + "literal": { + "i64": "0", + "nullable": false + } + }, + { + "literal": { + "string": "temp", + "nullable": false + } + } + ] + }, + { + "fields": [ + { + "literal": { + "i64": "1", + "nullable": false + } + }, + { + "literal": { + "string": "test", + "nullable": false + } + } + ] + } + ] + } + } + }, + "names": [ + "result1", "result2" + ] + } + } + ] + } \ No newline at end of file diff --git a/dev/changelog/49.0.0.md b/dev/changelog/49.0.0.md new file mode 100644 index 0000000000000..239c7c9dfc973 --- /dev/null +++ b/dev/changelog/49.0.0.md @@ -0,0 +1,387 @@ + + +# Apache DataFusion 49.0.0 Changelog + +This release consists of 253 commits from 71 contributors. See credits at the end of this changelog for more information. + +See the [upgrade guide](https://datafusion.apache.org/library-user-guide/upgrading.html) for information on how to upgrade from previous versions. + +**Breaking changes:** + +- feat: add metadata to literal expressions [#16170](https://github.com/apache/datafusion/pull/16170) (timsaucer) +- [MAJOR] Equivalence System Overhaul [#16217](https://github.com/apache/datafusion/pull/16217) (ozankabak) +- remove unused methods in SortExec [#16457](https://github.com/apache/datafusion/pull/16457) (adriangb) +- Move Pruning Logic to a Dedicated datafusion-pruning Crate for Improved Modularity [#16549](https://github.com/apache/datafusion/pull/16549) (kosiew) +- Fix type of ExecutionOptions::time_zone [#16569](https://github.com/apache/datafusion/pull/16569) (findepi) +- Convert Option> to Vec [#16615](https://github.com/apache/datafusion/pull/16615) (ViggoC) +- Refactor error handling to use boxed errors for DataFusionError variants [#16672](https://github.com/apache/datafusion/pull/16672) (kosiew) +- Reuse Rows allocation in RowCursorStream [#16647](https://github.com/apache/datafusion/pull/16647) (Dandandan) +- refactor: shrink `SchemaError` [#16653](https://github.com/apache/datafusion/pull/16653) (crepererum) +- Remove unused AggregateUDF struct [#16683](https://github.com/apache/datafusion/pull/16683) (ViggoC) +- Bump the MSRV to `1.85.1` due to transitive dependencies (`aws-sdk`) [#16728](https://github.com/apache/datafusion/pull/16728) (rtyler) + +**Performance related:** + +- Add late pruning of Parquet files based on file level statistics [#16014](https://github.com/apache/datafusion/pull/16014) (adriangb) +- Add fast paths for try_process_unnest [#16389](https://github.com/apache/datafusion/pull/16389) (simonvandel) +- Set the default value of `datafusion.execution.collect_statistics` to `true` [#16447](https://github.com/apache/datafusion/pull/16447) (AdamGS) +- Perf: Optimize CursorValues compare performance for StringViewArray (1.4X faster for sort-tpch Q11) [#16509](https://github.com/apache/datafusion/pull/16509) (zhuqi-lucas) +- Simplify predicates in `PushDownFilter` optimizer rule [#16362](https://github.com/apache/datafusion/pull/16362) (xudong963) +- optimize `ScalarValue::to_array_of_size` for structural types [#16706](https://github.com/apache/datafusion/pull/16706) (ding-young) +- Refactor filter pushdown APIs to enable joins to pass through filters [#16732](https://github.com/apache/datafusion/pull/16732) (adriangb) +- perf: Optimize hash joins with an empty build side [#16716](https://github.com/apache/datafusion/pull/16716) (nuno-faria) +- Per file filter evaluation [#15057](https://github.com/apache/datafusion/pull/15057) (adriangb) + +**Implemented enhancements:** + +- feat: Support defining custom MetricValues in PhysicalPlans [#16195](https://github.com/apache/datafusion/pull/16195) (sfluor) +- feat: Allow cancelling of grouping operations which are CPU bound [#16196](https://github.com/apache/datafusion/pull/16196) (zhuqi-lucas) +- feat: support FixedSizeList for array_has [#16333](https://github.com/apache/datafusion/pull/16333) (chenkovsky) +- feat: Support tpch and tpch10 benchmark for csv format [#16373](https://github.com/apache/datafusion/pull/16373) (zhuqi-lucas) +- feat: Support RightMark join for NestedLoop and Hash join [#16083](https://github.com/apache/datafusion/pull/16083) (jonathanc-n) +- feat: mapping sql Char/Text/String default to Utf8View [#16290](https://github.com/apache/datafusion/pull/16290) (zhuqi-lucas) +- feat: support fixed size list for array reverse [#16423](https://github.com/apache/datafusion/pull/16423) (chenkovsky) +- feat: add SchemaProvider::table_type(table_name: &str) [#16401](https://github.com/apache/datafusion/pull/16401) (epgif) +- feat: derive `Debug` and `Clone` for `ScalarFunctionArgs` [#16471](https://github.com/apache/datafusion/pull/16471) (crepererum) +- feat: support `map_entries` builtin function [#16557](https://github.com/apache/datafusion/pull/16557) (comphead) +- feat: add `array_min` scalar function and associated tests [#16574](https://github.com/apache/datafusion/pull/16574) (dharanad) +- feat: Finalize support for `RightMark` join + `Mark` join swap [#16488](https://github.com/apache/datafusion/pull/16488) (jonathanc-n) +- feat: Parquet modular encryption [#16351](https://github.com/apache/datafusion/pull/16351) (corwinjoy) +- feat: Support `u32` indices for `HashJoinExec` [#16434](https://github.com/apache/datafusion/pull/16434) (jonathanc-n) +- feat: expose intersect distinct/except distinct in dataframe api [#16578](https://github.com/apache/datafusion/pull/16578) (chenkovsky) +- feat: Add a configuration to make parquet encryption optional [#16649](https://github.com/apache/datafusion/pull/16649) (corwinjoy) + +**Fixed bugs:** + +- fix: preserve null_equals_null flag in eliminate_cross_join rule [#16356](https://github.com/apache/datafusion/pull/16356) (waynexia) +- fix: Fix SparkSha2 to be compliant with Spark response and add support for Int32 [#16350](https://github.com/apache/datafusion/pull/16350) (rishvin) +- fix: Fixed error handling for `generate_series/range` [#16391](https://github.com/apache/datafusion/pull/16391) (jonathanc-n) +- fix: Enable WASM compilation by making sqlparser's recursive-protection optional [#16418](https://github.com/apache/datafusion/pull/16418) (jonmmease) +- fix: create file for empty stream [#16342](https://github.com/apache/datafusion/pull/16342) (chenkovsky) +- fix: document and fix macro hygiene for `config_field!` [#16473](https://github.com/apache/datafusion/pull/16473) (crepererum) +- fix: make `with_new_state` a trait method for `ExecutionPlan` [#16469](https://github.com/apache/datafusion/pull/16469) (geoffreyclaude) +- fix: column indices in FFI partition evaluator [#16480](https://github.com/apache/datafusion/pull/16480) (timsaucer) +- fix: support within_group [#16538](https://github.com/apache/datafusion/pull/16538) (chenkovsky) +- fix: disallow specify both order_by and within_group [#16606](https://github.com/apache/datafusion/pull/16606) (watchingthewheelsgo) +- fix: format within_group error message [#16613](https://github.com/apache/datafusion/pull/16613) (watchingthewheelsgo) +- fix: reserved keywords in qualified column names [#16584](https://github.com/apache/datafusion/pull/16584) (crepererum) +- fix: support scalar function nested in get_field in Unparser [#16610](https://github.com/apache/datafusion/pull/16610) (chenkovsky) +- fix: sqllogictest runner label condition mismatch [#16633](https://github.com/apache/datafusion/pull/16633) (lliangyu-lin) +- fix: port arrow inline fast key fix to datafusion [#16698](https://github.com/apache/datafusion/pull/16698) (zhuqi-lucas) +- fix: try to lower plain reserved functions to columns as well [#16669](https://github.com/apache/datafusion/pull/16669) (crepererum) +- fix: Fix CI failing due to #16686 [#16718](https://github.com/apache/datafusion/pull/16718) (jonathanc-n) +- fix: return NULL if any of the param to make_date is NULL [#16759](https://github.com/apache/datafusion/pull/16759) (feniljain) +- fix: add `order_requirement` & `dist_requirement` to `OutputRequirementExec` display [#16726](https://github.com/apache/datafusion/pull/16726) (Loaki07) +- fix: support nullable columns in pre-sorted data sources [#16783](https://github.com/apache/datafusion/pull/16783) (crepererum) +- fix: The inconsistency between scalar and array on the cast decimal to timestamp [#16539](https://github.com/apache/datafusion/pull/16539) (chenkovsky) +- fix: unit test for object_storage [#16824](https://github.com/apache/datafusion/pull/16824) (chenkovsky) +- fix(docs): Update broken links to `TableProvider` docs [#16830](https://github.com/apache/datafusion/pull/16830) (jcsherin) + +**Documentation updates:** + +- Minor: Add upgrade guide for `Expr::WindowFunction` [#16313](https://github.com/apache/datafusion/pull/16313) (alamb) +- Fix `array_position` on empty list [#16292](https://github.com/apache/datafusion/pull/16292) (Blizzara) +- Fix: mark "Spilling (to disk) Joins" as supported in features [#16343](https://github.com/apache/datafusion/pull/16343) (kosiew) +- Fix cp_solver doc formatting [#16352](https://github.com/apache/datafusion/pull/16352) (xudong963) +- docs: Expand `MemoryPool` docs with related structs [#16289](https://github.com/apache/datafusion/pull/16289) (2010YOUY01) +- Support datafusion-cli access to public S3 buckets that do not require authentication [#16300](https://github.com/apache/datafusion/pull/16300) (alamb) +- Document Table Constraint Enforcement Behavior in Custom Table Providers Guide [#16340](https://github.com/apache/datafusion/pull/16340) (kosiew) +- doc: Add SQL examples for SEMI + ANTI Joins [#16316](https://github.com/apache/datafusion/pull/16316) (jonathanc-n) +- [datafusion-spark] Example of using Spark compatible function library [#16384](https://github.com/apache/datafusion/pull/16384) (alamb) +- Add note in upgrade guide about changes to `Expr::Scalar` in 48.0.0 [#16360](https://github.com/apache/datafusion/pull/16360) (alamb) +- Update PMC management instructions to follow new ASF process [#16417](https://github.com/apache/datafusion/pull/16417) (alamb) +- Add design process section to the docs [#16397](https://github.com/apache/datafusion/pull/16397) (alamb) +- Unify Metadata Handing: use `FieldMetadata` in `Expr::Alias` and `ExprSchemable` [#16320](https://github.com/apache/datafusion/pull/16320) (alamb) +- TopK dynamic filter pushdown attempt 2 [#15770](https://github.com/apache/datafusion/pull/15770) (adriangb) +- Update Roadmap documentation [#16399](https://github.com/apache/datafusion/pull/16399) (alamb) +- doc: Add comments to clarify algorithm for `MarkJoin`s [#16436](https://github.com/apache/datafusion/pull/16436) (jonathanc-n) +- Add compression option to SpillManager [#16268](https://github.com/apache/datafusion/pull/16268) (ding-young) +- Redirect user defined function webpage [#16475](https://github.com/apache/datafusion/pull/16475) (alamb) +- Use Tokio's task budget consistently, better APIs to support task cancellation [#16398](https://github.com/apache/datafusion/pull/16398) (pepijnve) +- doc: upgrade guide for new compression option for spill files [#16472](https://github.com/apache/datafusion/pull/16472) (2010YOUY01) +- Introduce Async User Defined Functions [#14837](https://github.com/apache/datafusion/pull/14837) (goldmedal) +- Minor: Add more links to cooperative / scheduling docs [#16484](https://github.com/apache/datafusion/pull/16484) (alamb) +- doc: Document DESCRIBE comman in ddl.md [#16524](https://github.com/apache/datafusion/pull/16524) (krikera) +- Add more doc for physical filter pushdown [#16504](https://github.com/apache/datafusion/pull/16504) (xudong963) +- chore: fix CI failures on `ddl.md` [#16526](https://github.com/apache/datafusion/pull/16526) (comphead) +- Add some comments about adding new dependencies in datafusion-sql [#16543](https://github.com/apache/datafusion/pull/16543) (alamb) +- Add note for planning release in Upgrade Guides [#16534](https://github.com/apache/datafusion/pull/16534) (xudong963) +- Consolidate configuration sections in docs [#16544](https://github.com/apache/datafusion/pull/16544) (alamb) +- Minor: add clearer link to the main website from intro paragraph. [#16556](https://github.com/apache/datafusion/pull/16556) (alamb) +- Simplify AsyncScalarUdfImpl so it extends ScalarUdfImpl [#16523](https://github.com/apache/datafusion/pull/16523) (alamb) +- docs: Minor grammatical fixes for the scalar UDF docs [#16618](https://github.com/apache/datafusion/pull/16618) (ianthetechie) +- Implementation for regex_instr [#15928](https://github.com/apache/datafusion/pull/15928) (nirnayroy) +- Update Upgrade Guide for 48.0.1 [#16699](https://github.com/apache/datafusion/pull/16699) (alamb) +- ensure MemTable has at least one partition [#16754](https://github.com/apache/datafusion/pull/16754) (waynexia) +- Restore custom SchemaAdapter functionality for Parquet [#16791](https://github.com/apache/datafusion/pull/16791) (adriangb) +- Update `upgrading.md` for new unified config for sql string mapping to utf8view [#16809](https://github.com/apache/datafusion/pull/16809) (zhuqi-lucas) +- docs: Remove reference to forthcoming example (#16817) [#16818](https://github.com/apache/datafusion/pull/16818) (m09526) +- docs: Fix broken links [#16839](https://github.com/apache/datafusion/pull/16839) (2010YOUY01) +- Add note to upgrade guide about MSRV update [#16845](https://github.com/apache/datafusion/pull/16845) (alamb) + +**Other:** + +- chore(deps): bump sqllogictest from 0.28.2 to 0.28.3 [#16286](https://github.com/apache/datafusion/pull/16286) (dependabot[bot]) +- chore(deps-dev): bump webpack-dev-server from 4.15.1 to 5.2.1 in /datafusion/wasmtest/datafusion-wasm-app [#16253](https://github.com/apache/datafusion/pull/16253) (dependabot[bot]) +- Improve DataFusion subcrate readme files [#16263](https://github.com/apache/datafusion/pull/16263) (alamb) +- Fix intermittent SQL logic test failure in limit.slt by adding ORDER BY clause [#16257](https://github.com/apache/datafusion/pull/16257) (kosiew) +- Extend benchmark comparison script with more detailed statistics [#16262](https://github.com/apache/datafusion/pull/16262) (pepijnve) +- chore(deps): bump flate2 from 1.1.1 to 1.1.2 [#16338](https://github.com/apache/datafusion/pull/16338) (dependabot[bot]) +- chore(deps): bump petgraph from 0.8.1 to 0.8.2 [#16337](https://github.com/apache/datafusion/pull/16337) (dependabot[bot]) +- chore(deps): bump substrait from 0.56.0 to 0.57.0 [#16143](https://github.com/apache/datafusion/pull/16143) (dependabot[bot]) +- Add test for ordering of predicate pushdown into parquet [#16169](https://github.com/apache/datafusion/pull/16169) (adriangb) +- Fix distinct count for DictionaryArray to correctly account for nulls in values array [#16258](https://github.com/apache/datafusion/pull/16258) (kosiew) +- Fix inconsistent schema projection in ListingTable even when schema is specified [#16305](https://github.com/apache/datafusion/pull/16305) (kosiew) +- tpch: move reading of SQL queries out of timed span. [#16357](https://github.com/apache/datafusion/pull/16357) (pepijnve) +- chore(deps): bump clap from 4.5.39 to 4.5.40 [#16354](https://github.com/apache/datafusion/pull/16354) (dependabot[bot]) +- chore(deps): bump syn from 2.0.101 to 2.0.102 [#16355](https://github.com/apache/datafusion/pull/16355) (dependabot[bot]) +- Encapsulate metadata for literals on to a `FieldMetadata` structure [#16317](https://github.com/apache/datafusion/pull/16317) (alamb) +- Add support `UInt64` and other integer data types for `to_hex` [#16335](https://github.com/apache/datafusion/pull/16335) (tlm365) +- Document `copy_array_data` function with example [#16361](https://github.com/apache/datafusion/pull/16361) (alamb) +- Fix array_agg memory over use [#16346](https://github.com/apache/datafusion/pull/16346) (gabotechs) +- Update publish command [#16377](https://github.com/apache/datafusion/pull/16377) (xudong963) +- Add more context to error message for datafusion-cli config failure [#16379](https://github.com/apache/datafusion/pull/16379) (alamb) +- Fix: datafusion-sqllogictest 48.0.0 can't be published [#16376](https://github.com/apache/datafusion/pull/16376) (xudong963) +- bug: remove busy-wait while sort is ongoing [#16322](https://github.com/apache/datafusion/pull/16322) (pepijnve) +- chore: refactor Substrait consumer's "rename_field" and implement the rest of types [#16345](https://github.com/apache/datafusion/pull/16345) (Blizzara) +- chore(deps): bump object_store from 0.12.1 to 0.12.2 [#16368](https://github.com/apache/datafusion/pull/16368) (dependabot[bot]) +- Disable `datafusion-cli` tests for hash_collision tests, fix extended CI [#16382](https://github.com/apache/datafusion/pull/16382) (alamb) +- Fix array_concat with NULL arrays [#16348](https://github.com/apache/datafusion/pull/16348) (alexanderbianchi) +- Minor: add testing case for add YieldStreamExec and polish docs [#16369](https://github.com/apache/datafusion/pull/16369) (zhuqi-lucas) +- chore(deps): bump aws-config from 1.6.3 to 1.8.0 [#16394](https://github.com/apache/datafusion/pull/16394) (dependabot[bot]) +- fix typo in test file name [#16403](https://github.com/apache/datafusion/pull/16403) (adriangb) +- Add topk_tpch benchmark [#16410](https://github.com/apache/datafusion/pull/16410) (Dandandan) +- Reduce some cloning [#16404](https://github.com/apache/datafusion/pull/16404) (simonvandel) +- chore(deps): bump syn from 2.0.102 to 2.0.103 [#16393](https://github.com/apache/datafusion/pull/16393) (dependabot[bot]) +- Simplify expressions passed to table functions [#16388](https://github.com/apache/datafusion/pull/16388) (simonvandel) +- Minor: Clean-up `bench.sh` usage message [#16416](https://github.com/apache/datafusion/pull/16416) (2010YOUY01) +- chore(deps): bump rust_decimal from 1.37.1 to 1.37.2 [#16422](https://github.com/apache/datafusion/pull/16422) (dependabot[bot]) +- Migrate core test to insta, part1 [#16324](https://github.com/apache/datafusion/pull/16324) (Chen-Yuan-Lai) +- chore(deps): bump mimalloc from 0.1.46 to 0.1.47 [#16426](https://github.com/apache/datafusion/pull/16426) (dependabot[bot]) +- chore(deps): bump libc from 0.2.172 to 0.2.173 [#16421](https://github.com/apache/datafusion/pull/16421) (dependabot[bot]) +- Use dedicated NullEquality enum instead of null_equals_null boolean [#16419](https://github.com/apache/datafusion/pull/16419) (tobixdev) +- chore: generate basic spark function tests [#16409](https://github.com/apache/datafusion/pull/16409) (shehabgamin) +- Fix CI Failure: replace false with NullEqualsNothing [#16437](https://github.com/apache/datafusion/pull/16437) (ding-young) +- chore(deps): bump bzip2 from 0.5.2 to 0.6.0 [#16441](https://github.com/apache/datafusion/pull/16441) (dependabot[bot]) +- chore(deps): bump libc from 0.2.173 to 0.2.174 [#16440](https://github.com/apache/datafusion/pull/16440) (dependabot[bot]) +- Remove redundant license-header-check CI job [#16451](https://github.com/apache/datafusion/pull/16451) (alamb) +- Remove unused feature in `physical-plan` and fix compilation error in benchmark [#16449](https://github.com/apache/datafusion/pull/16449) (AdamGS) +- Temporarily fix bug in dynamic top-k optimization [#16465](https://github.com/apache/datafusion/pull/16465) (AdamGS) +- Ignore `sort_query_fuzzer_runner` [#16462](https://github.com/apache/datafusion/pull/16462) (blaginin) +- Revert "Ignore `sort_query_fuzzer_runner` (#16462)" [#16470](https://github.com/apache/datafusion/pull/16470) (2010YOUY01) +- Reapply "Ignore `sort_query_fuzzer_runner` (#16462)" (#16470) [#16485](https://github.com/apache/datafusion/pull/16485) (alamb) +- Fix constant window for evaluate stateful [#16430](https://github.com/apache/datafusion/pull/16430) (suibianwanwank) +- Use UDTF name in logical plan table scan [#16468](https://github.com/apache/datafusion/pull/16468) (Jeadie) +- refactor reassign_predicate_columns to accept an &Schema instead of &Arc [#16499](https://github.com/apache/datafusion/pull/16499) (adriangb) +- re-enable `sort_query_fuzzer_runner` [#16491](https://github.com/apache/datafusion/pull/16491) (adriangb) +- Example for using a separate threadpool for CPU bound work (try 3) [#16331](https://github.com/apache/datafusion/pull/16331) (alamb) +- chore(deps): bump syn from 2.0.103 to 2.0.104 [#16507](https://github.com/apache/datafusion/pull/16507) (dependabot[bot]) +- use 'lit' as the field name for literal values [#16498](https://github.com/apache/datafusion/pull/16498) (adriangb) +- [datafusion-spark] Implement `factorical` function [#16125](https://github.com/apache/datafusion/pull/16125) (tlm365) +- Add DESC alias for DESCRIBE command. [#16514](https://github.com/apache/datafusion/pull/16514) (lucqui) +- Split clickbench query set into one file per query [#16476](https://github.com/apache/datafusion/pull/16476) (pepijnve) +- Support query filter on all benchmarks [#16477](https://github.com/apache/datafusion/pull/16477) (pepijnve) +- `TableProvider` to skip files in the folder which non relevant to selected reader [#16487](https://github.com/apache/datafusion/pull/16487) (comphead) +- Reuse `BaselineMetrics` in `UnnestMetrics` [#16497](https://github.com/apache/datafusion/pull/16497) (hendrikmakait) +- Fix array_has to return false for empty arrays instead of null [#16529](https://github.com/apache/datafusion/pull/16529) (kosiew) +- Minor: Add documentation to `AggregateWindowExpr::get_result_column` [#16479](https://github.com/apache/datafusion/pull/16479) (alamb) +- Fix WindowFrame::new with order_by [#16537](https://github.com/apache/datafusion/pull/16537) (findepi) +- chore(deps): bump object_store from 0.12.1 to 0.12.2 [#16548](https://github.com/apache/datafusion/pull/16548) (dependabot[bot]) +- chore(deps): bump mimalloc from 0.1.46 to 0.1.47 [#16547](https://github.com/apache/datafusion/pull/16547) (dependabot[bot]) +- Add support for Arrow Duration type in Substrait [#16503](https://github.com/apache/datafusion/pull/16503) (jkosh44) +- Allow unparser to override the alias name for the specific dialect [#16540](https://github.com/apache/datafusion/pull/16540) (goldmedal) +- Avoid clones when calling find_window_exprs [#16551](https://github.com/apache/datafusion/pull/16551) (findepi) +- Update `spilled_bytes` metric to reflect actual disk usage [#16535](https://github.com/apache/datafusion/pull/16535) (ding-young) +- adapt filter expressions to file schema during parquet scan [#16461](https://github.com/apache/datafusion/pull/16461) (adriangb) +- datafusion-cli: Use correct S3 region if it is not specified [#16502](https://github.com/apache/datafusion/pull/16502) (liamzwbao) +- Add nested struct casting support and integrate into SchemaAdapter [#16371](https://github.com/apache/datafusion/pull/16371) (kosiew) +- Improve err message grammar [#16566](https://github.com/apache/datafusion/pull/16566) (findepi) +- refactor: move PruningPredicate into its own module [#16587](https://github.com/apache/datafusion/pull/16587) (adriangb) +- chore(deps): bump indexmap from 2.9.0 to 2.10.0 [#16582](https://github.com/apache/datafusion/pull/16582) (dependabot[bot]) +- Skip re-pruning based on partition values and file level stats if there are no dynamic filters [#16424](https://github.com/apache/datafusion/pull/16424) (adriangb) +- Support timestamp and date arguments for `range` and `generate_series` table functions [#16552](https://github.com/apache/datafusion/pull/16552) (simonvandel) +- Fix normalization of columns in JOIN ... USING. [#16560](https://github.com/apache/datafusion/pull/16560) (brunal) +- Revert Finalize support for `RightMark` join + `Mark` join [#16597](https://github.com/apache/datafusion/pull/16597) (comphead) +- move min_batch/max_batch to functions-aggregate-common [#16593](https://github.com/apache/datafusion/pull/16593) (adriangb) +- Allow usage of table functions in relations [#16571](https://github.com/apache/datafusion/pull/16571) (osipovartem) +- Update to arrow/parquet 55.2.0 [#16575](https://github.com/apache/datafusion/pull/16575) (alamb) +- Improve field naming in first_value, last_value implementation [#16631](https://github.com/apache/datafusion/pull/16631) (findepi) +- Fix spurious failure in convert_batches test helper [#16627](https://github.com/apache/datafusion/pull/16627) (findepi) +- Aggregate UDF cleanup [#16628](https://github.com/apache/datafusion/pull/16628) (findepi) +- Avoid treating incomparable scalars as equal [#16624](https://github.com/apache/datafusion/pull/16624) (findepi) +- restore topk pre-filtering of batches and make sort query fuzzer less sensitive to expected non determinism [#16501](https://github.com/apache/datafusion/pull/16501) (alamb) +- Add support for Arrow Time types in Substrait [#16558](https://github.com/apache/datafusion/pull/16558) (jkosh44) +- chore(deps): bump substrait from 0.57.0 to 0.58.0 [#16640](https://github.com/apache/datafusion/pull/16640) (dependabot[bot]) +- Support explain tree format debug for benchmark debug [#16604](https://github.com/apache/datafusion/pull/16604) (zhuqi-lucas) +- Add microbenchmark for spilling with compression [#16512](https://github.com/apache/datafusion/pull/16512) (ding-young) +- Fix parquet filter_pushdown: respect parquet filter pushdown config in scan [#16646](https://github.com/apache/datafusion/pull/16646) (adriangb) +- chore(deps): bump aws-config from 1.8.0 to 1.8.1 [#16651](https://github.com/apache/datafusion/pull/16651) (dependabot[bot]) +- Migrate core test to insta, part 2 [#16617](https://github.com/apache/datafusion/pull/16617) (Chen-Yuan-Lai) +- Update all spark SLT files [#16637](https://github.com/apache/datafusion/pull/16637) (findepi) +- Add PhysicalExpr optimizer and cast unwrapping [#16530](https://github.com/apache/datafusion/pull/16530) (adriangb) +- benchmark: Support sort_tpch10 for benchmark [#16671](https://github.com/apache/datafusion/pull/16671) (zhuqi-lucas) +- chore(deps): bump tokio from 1.45.1 to 1.46.0 [#16666](https://github.com/apache/datafusion/pull/16666) (dependabot[bot]) +- Fix TopK Sort incorrectly pushed down past Join with anti join [#16641](https://github.com/apache/datafusion/pull/16641) (zhuqi-lucas) +- Improve error message when ScalarValue fails to cast array [#16670](https://github.com/apache/datafusion/pull/16670) (findepi) +- Add an example of embedding indexes inside a parquet file [#16395](https://github.com/apache/datafusion/pull/16395) (zhuqi-lucas) +- `datafusion-cli`: Refactor statement execution logic [#16634](https://github.com/apache/datafusion/pull/16634) (liamzwbao) +- Add SchemaAdapterFactory Support for ListingTable with Schema Evolution and Mapping [#16583](https://github.com/apache/datafusion/pull/16583) (kosiew) +- Perf: fast CursorValues compare for StringViewArray using inline*key*… [#16630](https://github.com/apache/datafusion/pull/16630) (zhuqi-lucas) +- Update to Rust 1.88 [#16663](https://github.com/apache/datafusion/pull/16663) (melroy12) +- Refactor StreamJoinMetrics to reuse BaselineMetrics [#16674](https://github.com/apache/datafusion/pull/16674) (Standing-Man) +- chore: refactor `BuildProbeJoinMetrics` to use `BaselineMetrics` [#16500](https://github.com/apache/datafusion/pull/16500) (Samyak2) +- Use compression type in CSV file suffices [#16609](https://github.com/apache/datafusion/pull/16609) (theirix) +- Clarify the generality of the embedded parquet index [#16692](https://github.com/apache/datafusion/pull/16692) (alamb) +- Refactor SortMergeJoinMetrics to reuse BaselineMetrics [#16675](https://github.com/apache/datafusion/pull/16675) (Standing-Man) +- Add support for Arrow Dictionary type in Substrait [#16608](https://github.com/apache/datafusion/pull/16608) (jkosh44) +- Fix duplicate field name error in Join::try_new_with_project_input during physical planning [#16454](https://github.com/apache/datafusion/pull/16454) (LiaCastaneda) +- chore(deps): bump tokio from 1.46.0 to 1.46.1 [#16700](https://github.com/apache/datafusion/pull/16700) (dependabot[bot]) +- Add reproducer for tpch Q16 deserialization bug [#16662](https://github.com/apache/datafusion/pull/16662) (NGA-TRAN) +- Minor: Update release instructions [#16701](https://github.com/apache/datafusion/pull/16701) (alamb) +- refactor filter pushdown APIs [#16642](https://github.com/apache/datafusion/pull/16642) (adriangb) +- Add comments to ClickBench queries about setting binary_as_string [#16605](https://github.com/apache/datafusion/pull/16605) (alamb) +- minor: improve display output for FFI execution plans [#16713](https://github.com/apache/datafusion/pull/16713) (timsaucer) +- Revert "fix: create file for empty stream" [#16682](https://github.com/apache/datafusion/pull/16682) (brunal) +- Add the missing equivalence info for filter pushdown [#16686](https://github.com/apache/datafusion/pull/16686) (liamzwbao) +- Fix sqllogictests test running compatibility (ignore `--test-threads`) [#16694](https://github.com/apache/datafusion/pull/16694) (mjgarton) +- Fix: Make `CopyTo` logical plan output schema consistent with physical schema [#16705](https://github.com/apache/datafusion/pull/16705) (bert-beyondloops) +- chore(devcontainer): use debian's `protobuf-compiler` package [#16687](https://github.com/apache/datafusion/pull/16687) (fvj) +- Add link to upgrade guide in changelog script [#16680](https://github.com/apache/datafusion/pull/16680) (alamb) +- Improve display format of BoundedWindowAggExec [#16645](https://github.com/apache/datafusion/pull/16645) (geetanshjuneja) +- Fix: optimize projections for unnest logical plan. [#16632](https://github.com/apache/datafusion/pull/16632) (bert-beyondloops) +- Use the `test-threads` option in sqllogictests [#16722](https://github.com/apache/datafusion/pull/16722) (mjgarton) +- chore(deps): bump clap from 4.5.40 to 4.5.41 [#16735](https://github.com/apache/datafusion/pull/16735) (dependabot[bot]) +- chore: make more clarity for internal errors [#16741](https://github.com/apache/datafusion/pull/16741) (comphead) +- Remove parquet_filter and parquet `sort` benchmarks [#16730](https://github.com/apache/datafusion/pull/16730) (alamb) +- Perform type coercion for corr aggregate function [#15776](https://github.com/apache/datafusion/pull/15776) (kumarlokesh) +- Improve dictionary null handling in hashing and expand aggregate test coverage for nulls [#16466](https://github.com/apache/datafusion/pull/16466) (kosiew) +- Improve Ci cache [#16709](https://github.com/apache/datafusion/pull/16709) (blaginin) +- Fix in list round trip in df proto [#16744](https://github.com/apache/datafusion/pull/16744) (XiangpengHao) +- chore: Make `GroupValues` and APIs on `PhysicalGroupBy` aggregation APIs public [#16733](https://github.com/apache/datafusion/pull/16733) (haohuaijin) +- Extend binary coercion rules to support Decimal arithmetic operations with integer(signed and unsigned) types [#16668](https://github.com/apache/datafusion/pull/16668) (jatin510) +- Support Type Coercion for NULL in Binary Arithmetic Expressions [#16761](https://github.com/apache/datafusion/pull/16761) (kosiew) +- chore(deps): bump chrono-tz from 0.10.3 to 0.10.4 [#16769](https://github.com/apache/datafusion/pull/16769) (dependabot[bot]) +- limit intermediate batch size in nested_loop_join [#16443](https://github.com/apache/datafusion/pull/16443) (UBarney) +- Add serialization/deserialization and round-trip tests for all tpc-h queries [#16742](https://github.com/apache/datafusion/pull/16742) (NGA-TRAN) +- Auto start testcontainers for `datafusion-cli` [#16644](https://github.com/apache/datafusion/pull/16644) (blaginin) +- Refactor BinaryTypeCoercer to Handle Null Coercion Early and Avoid Redundant Checks [#16768](https://github.com/apache/datafusion/pull/16768) (kosiew) +- Remove fixed version from MSRV check [#16786](https://github.com/apache/datafusion/pull/16786) (findepi) +- Add `clickbench_pushdown` benchmark [#16731](https://github.com/apache/datafusion/pull/16731) (alamb) +- add filter to handle backtrace [#16752](https://github.com/apache/datafusion/pull/16752) (geetanshjuneja) +- Support min/max aggregates for FixedSizeBinary type [#16765](https://github.com/apache/datafusion/pull/16765) (theirix) +- fix tests in page_pruning when filter pushdown is enabled by default [#16794](https://github.com/apache/datafusion/pull/16794) (XiangpengHao) +- Automatically split large single RecordBatches in `MemorySource` into smaller batches [#16734](https://github.com/apache/datafusion/pull/16734) (kosiew) +- CI: Fix slow join test [#16796](https://github.com/apache/datafusion/pull/16796) (2010YOUY01) +- Benchmark for char expression [#16743](https://github.com/apache/datafusion/pull/16743) (ajita-asthana) +- Add example of custom file schema casting rules [#16803](https://github.com/apache/datafusion/pull/16803) (adriangb) +- Fix discrepancy in Float64 to timestamp(9) casts for constants [#16639](https://github.com/apache/datafusion/pull/16639) (findepi) +- Fix: Preserve sorting for the COPY TO plan [#16785](https://github.com/apache/datafusion/pull/16785) (bert-beyondloops) +- chore(deps): bump object_store from 0.12.2 to 0.12.3 [#16807](https://github.com/apache/datafusion/pull/16807) (dependabot[bot]) +- Implement equals for stateful functions [#16781](https://github.com/apache/datafusion/pull/16781) (findepi) +- benchmark: Add parquet h2o support [#16804](https://github.com/apache/datafusion/pull/16804) (zhuqi-lucas) +- chore: use `equals_datatype` for `BinaryExpr` [#16813](https://github.com/apache/datafusion/pull/16813) (comphead) +- chore: add tests for out of bounds for NullArray [#16802](https://github.com/apache/datafusion/pull/16802) (comphead) +- Refactor binary.rs tests into modular submodules under `binary/tests` [#16782](https://github.com/apache/datafusion/pull/16782) (kosiew) +- cache generation of dictionary keys and null arrays for ScalarValue [#16789](https://github.com/apache/datafusion/pull/16789) (adriangb) +- refactor(examples): remove redundant call to create directory in `parquet_embedded_index.rs` [#16825](https://github.com/apache/datafusion/pull/16825) (jcsherin) +- Add benchmark for ByteViewGroupValueBuilder [#16826](https://github.com/apache/datafusion/pull/16826) (zhuqi-lucas) +- Simplify try cast expr evaluation [#16834](https://github.com/apache/datafusion/pull/16834) (lewiszlw) +- Fix flaky test case in joins.slt [#16849](https://github.com/apache/datafusion/pull/16849) (findepi) +- chore(deps): bump sysinfo from 0.35.2 to 0.36.1 [#16850](https://github.com/apache/datafusion/pull/16850) (dependabot[bot]) + +## Credits + +Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. + +``` + 33 Andrew Lamb + 26 dependabot[bot] + 19 Adrian Garcia Badaracco + 14 kosiew + 13 Piotr Findeisen + 13 Qi Zhu + 7 Jonathan Chen + 6 Chen Chongchen + 6 Marco Neumann + 6 Oleks V + 6 Pepijn Van Eeckhoudt + 6 xudong.w + 5 Yongting You + 5 ding-young + 4 Simon Vandel Sillesen + 3 Adam Gutglick + 3 Bert Vermeiren + 3 Dmitrii Blaginin + 3 Joseph Koshakow + 3 Liam Bao + 3 Tim Saucer + 2 Alan Tang + 2 Arttu + 2 Bruno + 2 Corwin Joy + 2 Daniël Heres + 2 Geetansh Juneja + 2 Ian Lai + 2 Jax Liu + 2 Martin Garton + 2 Nga Tran + 2 Ruihang Xia + 2 Tai Le Manh + 2 ViggoC + 2 Xiangpeng Hao + 2 haiywu + 2 theirix + 1 Ajeeta Asthana + 1 Artem Osipov + 1 Dharan Aditya + 1 Gabriel + 1 Geoffrey Claude + 1 Hendrik Makait + 1 Huaijin + 1 Ian Wagner + 1 Jack Eadie + 1 Jagdish Parihar + 1 Jon Mease + 1 Julius von Froreich + 1 K + 1 Leon Lin + 1 Loakesh Indiran + 1 Lokesh + 1 Lucas Earl + 1 Lía Adriana + 1 Mehmet Ozan Kabak + 1 Melroy dsilva + 1 Nirnay Roy + 1 Nuno Faria + 1 R. Tyler Croy + 1 Rishab Joshi + 1 Sami Tabet + 1 Samyak Sarnayak + 1 Shehab Amin + 1 Tobias Schwarzinger + 1 UBarney + 1 alexanderbianchi + 1 epgif + 1 feniljain + 1 m09526 + 1 suibianwanwan +``` + +Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. diff --git a/dev/release/README.md b/dev/release/README.md index f1b0d286e8953..862865f75f1b6 100644 --- a/dev/release/README.md +++ b/dev/release/README.md @@ -278,8 +278,8 @@ Verify that the Cargo.toml in the tarball contains the correct version (cd datafusion/optimizer && cargo publish) (cd datafusion/common-runtime && cargo publish) (cd datafusion/physical-plan && cargo publish) -(cd datafusion/session && cargo publish) (cd datafusion/physical-optimizer && cargo publish) +(cd datafusion/session && cargo publish) (cd datafusion/datasource && cargo publish) (cd datafusion/catalog && cargo publish) (cd datafusion/catalog-listing && cargo publish) diff --git a/dev/release/generate-changelog.py b/dev/release/generate-changelog.py index 1349416bcaa59..830d329f73c4f 100755 --- a/dev/release/generate-changelog.py +++ b/dev/release/generate-changelog.py @@ -124,6 +124,9 @@ def generate_changelog(repo, repo_name, tag1, tag2, version): print(f"This release consists of {commit_count} commits from {contributor_count} contributors. " f"See credits at the end of this changelog for more information.\n") + print("See the [upgrade guide](https://datafusion.apache.org/library-user-guide/upgrading.html) " + "for information on how to upgrade from previous versions.\n") + print_pulls(repo_name, "Breaking changes", breaking) print_pulls(repo_name, "Performance related", performance) print_pulls(repo_name, "Implemented enhancements", enhancements) diff --git a/docs/source/index.rst b/docs/source/index.rst index 01f39bcb7c2e0..a22c109655a54 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -126,6 +126,7 @@ To get started, see :caption: Library User Guide library-user-guide/index + library-user-guide/upgrading library-user-guide/extensions library-user-guide/using-the-sql-api library-user-guide/working-with-exprs @@ -138,7 +139,6 @@ To get started, see library-user-guide/extending-operators library-user-guide/profiling library-user-guide/query-optimizer - library-user-guide/upgrading .. .. _toc.contributor-guide: diff --git a/docs/source/library-user-guide/building-logical-plans.md b/docs/source/library-user-guide/building-logical-plans.md index e1e75b3e4bdbd..05b1ae3cddc38 100644 --- a/docs/source/library-user-guide/building-logical-plans.md +++ b/docs/source/library-user-guide/building-logical-plans.md @@ -181,7 +181,7 @@ async fn main() -> Result<(), DataFusionError> { // TableProvider. For this example, we don't provide any data // but in production code, this would have `RecordBatch`es with // in memory data - let table_provider = Arc::new(MemTable::try_new(Arc::new(schema), vec![])?); + let table_provider = Arc::new(MemTable::try_new(Arc::new(schema), vec![vec![]])?); // Use the provider_as_source function to convert the TableProvider to a table source let table_source = provider_as_source(table_provider); @@ -220,7 +220,7 @@ However, it is more common to use a [TableProvider]. To get a [TableSource] from [logicaltablesource]: https://docs.rs/datafusion-expr/latest/datafusion_expr/logical_plan/builder/struct.LogicalTableSource.html [defaulttablesource]: https://docs.rs/datafusion/latest/datafusion/datasource/default_table_source/struct.DefaultTableSource.html [provider_as_source]: https://docs.rs/datafusion/latest/datafusion/datasource/default_table_source/fn.provider_as_source.html -[tableprovider]: https://docs.rs/datafusion/latest/datafusion/datasource/provider/trait.TableProvider.html +[tableprovider]: https://docs.rs/datafusion/latest/datafusion/datasource/trait.TableProvider.html [tablesource]: https://docs.rs/datafusion-expr/latest/datafusion_expr/trait.TableSource.html [`executionplan`]: https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.ExecutionPlan.html [`sessionstate::create_physical_plan`]: https://docs.rs/datafusion/latest/datafusion/execution/session_state/struct.SessionState.html#method.create_physical_plan diff --git a/docs/source/library-user-guide/functions/adding-udfs.md b/docs/source/library-user-guide/functions/adding-udfs.md index cf5624f68d04f..da9b6e37a6445 100644 --- a/docs/source/library-user-guide/functions/adding-udfs.md +++ b/docs/source/library-user-guide/functions/adding-udfs.md @@ -23,13 +23,22 @@ User Defined Functions (UDFs) are functions that can be used in the context of D This page covers how to add UDFs to DataFusion. In particular, it covers how to add Scalar, Window, and Aggregate UDFs. -| UDF Type | Description | Example | -| ------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------- | -| Scalar | A function that takes a row of data and returns a single value. | [simple_udf.rs][1] | -| Window | A function that takes a row of data and returns a single value, but also has access to the rows around it. | [simple_udwf.rs][2] | -| Aggregate | A function that takes a group of rows and returns a single value. | [simple_udaf.rs][3] | -| Table | A function that takes parameters and returns a `TableProvider` to be used in an query plan. | [simple_udtf.rs][4] | -| Async Scalar | A scalar function that natively supports asynchronous execution, allowing you to perform async operations (such as network or I/O calls) within the UDF. | [async_udf.rs][5] | +| UDF Type | Description | Example(s) | +| -------------- | ---------------------------------------------------------------------------------------------------------- | ------------------------------------- | +| Scalar | A function that takes a row of data and returns a single value. | [simple_udf.rs] / [advanced_udf.rs] | +| Window | A function that takes a row of data and returns a single value, but also has access to the rows around it. | [simple_udwf.rs] / [advanced_udwf.rs] | +| Aggregate | A function that takes a group of rows and returns a single value. | [simple_udaf.rs] / [advanced_udaf.rs] | +| Table | A function that takes parameters and returns a `TableProvider` to be used in an query plan. | [simple_udtf.rs] | +| Scalar (async) | A scalar function for performing `async` operations (such as network or I/O calls) within the UDF. | [async_udf.rs] | + +[simple_udf.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udf.rs +[advanced_udf.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udf.rs +[simple_udwf.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udwf.rs +[advanced_udwf.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udwf.rs +[simple_udaf.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udaf.rs +[advanced_udaf.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udaf.rs +[simple_udtf.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udtf.rs +[async_udf.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/async_udf.rs First we'll talk about adding an Scalar UDF end-to-end, then we'll talk about the differences between the different types of UDFs. @@ -345,9 +354,9 @@ async fn main() { } ``` -## Adding a Scalar Async UDF +## Adding a Async Scalar UDF -A Scalar Async UDF allows you to implement user-defined functions that support +An Async Scalar UDF allows you to implement user-defined functions that support asynchronous execution, such as performing network or I/O operations within the UDF. @@ -359,22 +368,21 @@ To add a Scalar Async UDF, you need to: ### Adding by `impl AsyncScalarUDFImpl` ```rust -use arrow::array::{ArrayIter, ArrayRef, AsArray, StringArray}; -use arrow_schema::DataType; -use async_trait::async_trait; -use datafusion::common::error::Result; -use datafusion::common::{internal_err, not_impl_err}; -use datafusion::common::types::logical_string; -use datafusion::config::ConfigOptions; -use datafusion_expr::ScalarUDFImpl; -use datafusion::logical_expr::async_udf::AsyncScalarUDFImpl; -use datafusion::logical_expr::{ - ColumnarValue, Signature, TypeSignature, TypeSignatureClass, Volatility, ScalarFunctionArgs -}; -use datafusion::logical_expr_common::signature::Coercion; -use log::trace; -use std::any::Any; -use std::sync::Arc; +# use arrow::array::{ArrayIter, ArrayRef, AsArray, StringArray}; +# use arrow_schema::DataType; +# use async_trait::async_trait; +# use datafusion::common::error::Result; +# use datafusion::common::{internal_err, not_impl_err}; +# use datafusion::common::types::logical_string; +# use datafusion::config::ConfigOptions; +# use datafusion_expr::ScalarUDFImpl; +# use datafusion::logical_expr::async_udf::AsyncScalarUDFImpl; +# use datafusion::logical_expr::{ +# ColumnarValue, Signature, TypeSignature, TypeSignatureClass, Volatility, ScalarFunctionArgs +# }; +# use datafusion::logical_expr_common::signature::Coercion; +# use std::any::Any; +# use std::sync::Arc; #[derive(Debug)] pub struct AsyncUpper { @@ -419,6 +427,7 @@ impl ScalarUDFImpl for AsyncUpper { Ok(DataType::Utf8) } + // Note the normal invoke_with_args method is not called for Async UDFs fn invoke_with_args( &self, _args: ScalarFunctionArgs, @@ -434,13 +443,16 @@ impl AsyncScalarUDFImpl for AsyncUpper { Some(10) } + /// This method is called to execute the async UDF and is similar + /// to the normal `invoke_with_args` except it returns an `ArrayRef` + /// instead of `ColumnarValue` and is `async`. async fn invoke_async_with_args( &self, args: ScalarFunctionArgs, - _option: &ConfigOptions, ) -> Result { - trace!("Invoking async_upper with args: {:?}", args); let value = &args.args[0]; + // This function simply implements a simple string to uppercase conversion + // but can be used for any async operation such as network calls. let result = match value { ColumnarValue::Array(array) => { let string_array = array.as_string::(); @@ -536,7 +548,6 @@ We can now transfer the async UDF into the normal scalar using `into_scalar_udf` # async fn invoke_async_with_args( # &self, # args: ScalarFunctionArgs, -# _option: &ConfigOptions, # ) -> Result { # trace!("Invoking async_upper with args: {:?}", args); # let value = &args.args[0]; diff --git a/docs/source/library-user-guide/upgrading.md b/docs/source/library-user-guide/upgrading.md index 499e7b14304ec..6bae22f6916f9 100644 --- a/docs/source/library-user-guide/upgrading.md +++ b/docs/source/library-user-guide/upgrading.md @@ -24,24 +24,53 @@ **Note:** DataFusion `49.0.0` has not been released yet. The information provided in this section pertains to features and changes that have already been merged to the main branch and are awaiting release in this version. You can see the current [status of the `49.0.0 `release here](https://github.com/apache/datafusion/issues/16235) -### `datafusion.execution.collect_statistics` now defaults to `true` +### `MSRV` updated to 1.85.1 -The default value of the `datafusion.execution.collect_statistics` configuration -setting is now true. This change impacts users that use that value directly and relied -on its default value being `false`. +The Minimum Supported Rust Version (MSRV) has been updated to [`1.85.1`]. See +[#16728] for details. -This change also restores the default behavior of `ListingTable` to its previous. If you use it directly -you can maintain the current behavior by overriding the default value in your code. +[`1.85.1`]: https://releases.rs/docs/1.85.1/ +[#16728]: https://github.com/apache/datafusion/pull/16728 + +### `DataFusionError` variants are now `Box`ed + +To reduce the size of `DataFusionError`, several variants that were previously stored inline are now `Box`ed. This reduces the size of `Result` and thus stack usage and async state machine size. Please see [#16652] for more details. + +The following variants of `DataFusionError` are now boxed: + +- `ArrowError` +- `SQL` +- `SchemaError` + +This is a breaking change. Code that constructs or matches on these variants will need to be updated. + +For example, to create a `SchemaError`, instead of: ```rust # /* comment to avoid running -ListingOptions::new(Arc::new(ParquetFormat::default())) - .with_collect_stat(false) - // other options +use datafusion_common::{DataFusionError, SchemaError}; +DataFusionError::SchemaError( + SchemaError::DuplicateUnqualifiedField { name: "foo".to_string() }, + Box::new(None) +) +# */ +``` + +You now need to `Box` the inner error: + +```rust +# /* comment to avoid running +use datafusion_common::{DataFusionError, SchemaError}; +DataFusionError::SchemaError( + Box::new(SchemaError::DuplicateUnqualifiedField { name: "foo".to_string() }), + Box::new(None) +) # */ ``` -### Metadata is now represented by `FieldMetadata` +[#16652]: https://github.com/apache/datafusion/issues/16652 + +### Metadata on Arrow Types is now represented by `FieldMetadata` Metadata from the Arrow `Field` is now stored using the `FieldMetadata` structure. In prior versions it was stored as both a `HashMap` @@ -99,6 +128,92 @@ SET datafusion.execution.spill_compression = 'zstd'; For more details about this configuration option, including performance trade-offs between different compression codecs, see the [Configuration Settings](../user-guide/configs.md) documentation. +### Deprecated `map_varchar_to_utf8view` configuration option + +See [issue #16290](https://github.com/apache/datafusion/pull/16290) for more information +The old configuration + +```text +datafusion.sql_parser.map_varchar_to_utf8view +``` + +is now **deprecated** in favor of the unified option below.\ +If you previously used this to control only `VARCHAR`→`Utf8View` mapping, please migrate to `map_string_types_to_utf8view`. + +--- + +### New `map_string_types_to_utf8view` configuration option + +To unify **all** SQL string types (`CHAR`, `VARCHAR`, `TEXT`, `STRING`) to Arrow’s zero‑copy `Utf8View`, DataFusion 49.0.0 introduces: + +- **Key**: `datafusion.sql_parser.map_string_types_to_utf8view` +- **Default**: `true` + +**Description:** + +- When **true** (default), **all** SQL string types are mapped to `Utf8View`, avoiding full‑copy UTF‑8 allocations and improving performance. +- When **false**, DataFusion falls back to the legacy `Utf8` mapping for **all** string types. + +#### Examples + +```rust +# /* comment to avoid running +// Disable Utf8View mapping for all SQL string types +let opts = datafusion::sql::planner::ParserOptions::new() + .with_map_string_types_to_utf8view(false); + +// Verify the setting is applied +assert!(!opts.map_string_types_to_utf8view); +# */ +``` + +--- + +```sql +-- Disable Utf8View mapping globally +SET datafusion.sql_parser.map_string_types_to_utf8view = false; + +-- Now VARCHAR, CHAR, TEXT, STRING all use Utf8 rather than Utf8View +CREATE TABLE my_table (a VARCHAR, b TEXT, c STRING); +DESCRIBE my_table; +``` + +### Deprecating `SchemaAdapterFactory` and `SchemaAdapter` + +We are moving away from converting data (using `SchemaAdapter`) to converting the expressions themselves (which is more efficient and flexible). + +See [issue #16800](https://github.com/apache/datafusion/issues/16800) for more information +The first place this change has taken place is in predicate pushdown for Parquet. +By default if you do not use a custom `SchemaAdapterFactory` we will use expression conversion instead. +If you do set a custom `SchemaAdapterFactory` we will continue to use it but emit a warning about that code path being deprecated. + +To resolve this you need to implement a custom `PhysicalExprAdapterFactory` and use that instead of a `SchemaAdapterFactory`. +See the [default values](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/default_column_values.rs) for an example of how to do this. +Opting into the new APIs will set you up for future changes since we plan to expand use of `PhysicalExprAdapterFactory` to other areas of DataFusion. + +See [#16800] for details. + +[#16800]: https://github.com/apache/datafusion/issues/16800 + +## DataFusion `48.0.1` + +### `datafusion.execution.collect_statistics` now defaults to `true` + +The default value of the `datafusion.execution.collect_statistics` configuration +setting is now true. This change impacts users that use that value directly and relied +on its default value being `false`. + +This change also restores the default behavior of `ListingTable` to its previous. If you use it directly +you can maintain the current behavior by overriding the default value in your code. + +```rust +# /* comment to avoid running +ListingOptions::new(Arc::new(ParquetFormat::default())) + .with_collect_stat(false) + // other options +# */ +``` + ## DataFusion `48.0.0` ### `Expr::Literal` has optional metadata diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index c618aa18c2318..96b7ee672bdb6 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -70,7 +70,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus | datafusion.execution.parquet.statistics_enabled | page | (writing) Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting | | datafusion.execution.parquet.max_statistics_size | 4096 | (writing) Sets max statistics size for any column. If NULL, uses default parquet writer setting max_statistics_size is deprecated, currently it is not being used | | datafusion.execution.parquet.max_row_group_size | 1048576 | (writing) Target maximum number of rows in each row group (defaults to 1M rows). Writing larger row groups requires more memory to write, but can get better compression and be faster to read. | -| datafusion.execution.parquet.created_by | datafusion version 48.0.0 | (writing) Sets "created by" property | +| datafusion.execution.parquet.created_by | datafusion version 49.0.0 | (writing) Sets "created by" property | | datafusion.execution.parquet.column_index_truncate_length | 64 | (writing) Sets column index truncate length | | datafusion.execution.parquet.statistics_truncate_length | NULL | (writing) Sets statictics truncate length. If NULL, uses default parquet writer setting | | datafusion.execution.parquet.data_page_row_count_limit | 20000 | (writing) Sets best effort maximum number of rows in data page | @@ -127,6 +127,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus | datafusion.explain.show_sizes | true | When set to true, the explain statement will print the partition sizes | | datafusion.explain.show_schema | false | When set to true, the explain statement will print schema information | | datafusion.explain.format | indent | Display format of explain. Default is "indent". When set to "tree", it will print the plan in a tree-rendered format. | +| datafusion.explain.tree_maximum_render_width | 240 | (format=tree only) Maximum total width of the rendered tree. When set to 0, the tree will have no width limit. | | datafusion.sql_parser.parse_float_as_decimal | false | When set to true, SQL parser will parse float as decimal type | | datafusion.sql_parser.enable_ident_normalization | true | When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted) | | datafusion.sql_parser.enable_options_value_normalization | false | When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically. | diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index eb4b86e4b4865..7b4bb71d1c598 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -1793,6 +1793,7 @@ regular expression [syntax](https://docs.rs/regex/latest/regex/#syntax) The following regular expression functions are supported: - [regexp_count](#regexp_count) +- [regexp_instr](#regexp_instr) - [regexp_like](#regexp_like) - [regexp_match](#regexp_match) - [regexp_replace](#regexp_replace) @@ -1828,6 +1829,39 @@ regexp_count(str, regexp[, start, flags]) +---------------------------------------------------------------+ ``` +### `regexp_instr` + +Returns the position in a string where the specified occurrence of a POSIX regular expression is located. + +```sql +regexp_instr(str, regexp[, start[, N[, flags[, subexpr]]]]) +``` + +#### Arguments + +- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **regexp**: Regular expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **start**: - **start**: Optional start position (the first position is 1) to search for the regular expression. Can be a constant, column, or function. Defaults to 1 +- **N**: - **N**: Optional The N-th occurrence of pattern to find. Defaults to 1 (first match). Can be a constant, column, or function. +- **flags**: Optional regular expression flags that control the behavior of the regular expression. The following flags are supported: + - **i**: case-insensitive: letters match both upper and lower case + - **m**: multi-line mode: ^ and $ match begin/end of line + - **s**: allow . to match \n + - **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used + - **U**: swap the meaning of x* and x*? +- **subexpr**: Optional Specifies which capture group (subexpression) to return the position for. Defaults to 0, which returns the position of the entire match. + +#### Example + +```sql +> SELECT regexp_instr('ABCDEF', 'C(.)(..)'); ++---------------------------------------------------------------+ +| regexp_instr(Utf8("ABCDEF"),Utf8("C(.)(..)")) | ++---------------------------------------------------------------+ +| 3 | ++---------------------------------------------------------------+ +``` + ### `regexp_like` Returns true if a [regular expression](https://docs.rs/regex/latest/regex/#syntax) has at least one match in a string, false otherwise. diff --git a/docs/source/user-guide/sql/special_functions.md b/docs/source/user-guide/sql/special_functions.md index 7c9efbb66218f..4f2a39f642b06 100644 --- a/docs/source/user-guide/sql/special_functions.md +++ b/docs/source/user-guide/sql/special_functions.md @@ -69,6 +69,7 @@ Expands an array or map into rows. ### `unnest (struct)` Expand a struct fields into individual columns. +Each field of the struct will be prefixed with `__unnest_placeholder` and could be accessed via `"__unnest_placeholder()."`. #### Arguments @@ -91,10 +92,10 @@ Expand a struct fields into individual columns. +---------------------------+ > select unnest(struct_column) from foov; -+------------------------------------------+------------------------------------------+ -| unnest_placeholder(foov.struct_column).a | unnest_placeholder(foov.struct_column).b | -+------------------------------------------+------------------------------------------+ -| 5 | a string | -| 6 | another string | -+------------------------------------------+------------------------------------------+ ++--------------------------------------------+--------------------------------------------+ +| __unnest_placeholder(foov.struct_column).a | __unnest_placeholder(foov.struct_column).b | ++--------------------------------------------+--------------------------------------------+ +| 5 | a string | +| 6 | another string | ++--------------------------------------------+--------------------------------------------+ ``` diff --git a/rust-toolchain.toml b/rust-toolchain.toml index c52dd7322d9a3..f772c0987b760 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -19,5 +19,5 @@ # to compile this workspace and run CI jobs. [toolchain] -channel = "1.87.0" +channel = "1.88.0" components = ["rustfmt", "clippy"] diff --git a/test-utils/Cargo.toml b/test-utils/Cargo.toml index 811102cf6dbdb..3a161d5f4d645 100644 --- a/test-utils/Cargo.toml +++ b/test-utils/Cargo.toml @@ -27,7 +27,7 @@ workspace = true [dependencies] arrow = { workspace = true } -chrono-tz = { version = "0.10.3", default-features = false } +chrono-tz = { version = "0.10.4", default-features = false } datafusion-common = { workspace = true, default-features = true } env_logger = { workspace = true } rand = { workspace = true }