diff --git a/.github/workflows/labels.yml b/.github/workflows/labels.yml deleted file mode 100644 index 43eff95f422..00000000000 --- a/.github/workflows/labels.yml +++ /dev/null @@ -1,63 +0,0 @@ -name: PR Labels - -on: - pull_request: - types: [opened, reopened, synchronize, labeled, unlabeled] # Trigger on these PR activities - -jobs: - check_changelog_label: - name: Validate Changelog Label - runs-on: ubuntu-latest - permissions: - pull-requests: read # Grant permission to read PR information - steps: - - name: Get PR Labels from API - id: get_labels_api - uses: octokit/request-action@v2.4.0 # Use an action to make API requests - with: - route: GET /repos/{owner}/{repo}/issues/{pull_number}/labels - owner: ${{ github.repository_owner }} - repo: ${{ github.event.repository.name }} - pull_number: ${{ github.event.pull_request.number }} - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # Automatically provided token - - - name: Extract and Check Labels - env: - API_RESPONSE: ${{ steps.get_labels_api.outputs.data }} - run: | - REQUIRED_LABELS=( - "chore" - "bug" - "feature" - "fix" - "performance" - "break" - "wire-break" - ) - REQUIRED_LABELS_JSON=$(jq -n '$ARGS.positional' --args "${REQUIRED_LABELS[@]}") - echo "Required Labels: $REQUIRED_LABELS_JSON" - - # Parse the response from the API call - # The API returns an array of label objects, we need just the 'name' property - echo "API Response: $API_RESPONSE" - - # Extract only the label names into a JSON array - CURRENT_PR_LABELS_JSON=$(echo "$API_RESPONSE" | jq '[.[] | .name]') - echo "Current PR Labels from API: $CURRENT_PR_LABELS_JSON" - - found_one=false - for label in "${REQUIRED_LABELS[@]}"; do - if echo "$CURRENT_PR_LABELS_JSON" | jq -e --arg label "$label" 'contains([$label])'; then - echo "Found required label: $label" - found_one=true - break - fi - done - - if [ "$found_one" = false ]; then - echo "::error file=.github/workflows/pr-labels.yml::Pull Request is missing a required changelog label. Please add one of: ${REQUIRED_LABELS[*]}." - exit 1 - else - echo "Pull Request has a required changelog label." - fi diff --git a/Cargo.lock b/Cargo.lock index 5d4191004e6..118f6b5f2cf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -196,9 +196,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "55.2.0" +version = "56.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3f15b4c6b148206ff3a2b35002e08929c2462467b62b9c02036d9c34f9ef994" +checksum = "c26b57282a08ae92f727497805122fec964c6245cfa0e13f0e75452eaf3bc41f" dependencies = [ "arrow-arith", "arrow-array", @@ -217,9 +217,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "55.2.0" +version = "56.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30feb679425110209ae35c3fbf82404a39a4c0436bb3ec36164d8bffed2a4ce4" +checksum = "cebf38ca279120ff522f4954b81a39527425b6e9f615e6b72842f4de1ffe02b8" dependencies = [ "arrow-array", "arrow-buffer", @@ -231,9 +231,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "55.2.0" +version = "56.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70732f04d285d49054a48b72c54f791bb3424abae92d27aafdf776c98af161c8" +checksum = "744109142cdf8e7b02795e240e20756c2a782ac9180d4992802954a8f871c0de" dependencies = [ "ahash", "arrow-buffer", @@ -248,9 +248,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "55.2.0" +version = "56.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "169b1d5d6cb390dd92ce582b06b23815c7953e9dfaaea75556e89d890d19993d" +checksum = "601bb103c4c374bcd1f62c66bcea67b42a2ee91a690486c37d4c180236f11ccc" dependencies = [ "bytes", "half", @@ -259,9 +259,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "55.2.0" +version = "56.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4f12eccc3e1c05a766cafb31f6a60a46c2f8efec9b74c6e0648766d30686af8" +checksum = "eed61d9d73eda8df9e3014843def37af3050b5080a9acbe108f045a316d5a0be" dependencies = [ "arrow-array", "arrow-buffer", @@ -280,9 +280,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "55.2.0" +version = "56.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "012c9fef3f4a11573b2c74aec53712ff9fdae4a95f4ce452d1bbf088ee00f06b" +checksum = "fa95b96ce0c06b4d33ac958370db8c0d31e88e54f9d6e08b0353d18374d9f991" dependencies = [ "arrow-array", "arrow-cast", @@ -295,9 +295,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "55.2.0" +version = "56.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8de1ce212d803199684b658fc4ba55fb2d7e87b213de5af415308d2fee3619c2" +checksum = "43407f2c6ba2367f64d85d4603d6fb9c4b92ed79d2ffd21021b37efa96523e12" dependencies = [ "arrow-buffer", "arrow-schema", @@ -307,24 +307,24 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "55.2.0" +version = "56.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9ea5967e8b2af39aff5d9de2197df16e305f47f404781d3230b2dc672da5d92" +checksum = "e4b0487c4d2ad121cbc42c4db204f1509f8618e589bc77e635e9c40b502e3b90" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", + "arrow-select", "flatbuffers", "lz4_flex", - "zstd", ] [[package]] name = "arrow-json" -version = "55.2.0" +version = "56.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5709d974c4ea5be96d900c01576c7c0b99705f4a3eec343648cb1ca863988a9c" +checksum = "26d747573390905905a2dc4c5a61a96163fe2750457f90a04ee2a88680758c79" dependencies = [ "arrow-array", "arrow-buffer", @@ -344,9 +344,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "55.2.0" +version = "56.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6506e3a059e3be23023f587f79c82ef0bcf6d293587e3272d20f2d30b969b5a7" +checksum = "c142a147dceb59d057bad82400f1693847c80dca870d008bf7b91caf902810ae" dependencies = [ "arrow-array", "arrow-buffer", @@ -357,9 +357,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "55.2.0" +version = "56.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52bf7393166beaf79b4bed9bfdf19e97472af32ce5b6b48169d321518a08cae2" +checksum = "dac6620667fccdab4204689ca173bd84a15de6bb6b756c3a8764d4d7d0c2fc04" dependencies = [ "arrow-array", "arrow-buffer", @@ -370,9 +370,9 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "55.2.0" +version = "56.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af7686986a3bf2254c9fb130c623cdcb2f8e1f15763e7c71c310f0834da3d292" +checksum = "dfa93af9ff2bb80de539e6eb2c1c8764abd0f4b73ffb0d7c82bf1f9868785e66" dependencies = [ "bitflags", "serde", @@ -381,9 +381,9 @@ dependencies = [ [[package]] name = "arrow-select" -version = "55.2.0" +version = "56.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd2b45757d6a2373faa3352d02ff5b54b098f5e21dccebc45a21806bc34501e5" +checksum = "be8b2e0052cd20d36d64f32640b68a5ab54d805d24a473baee5d52017c85536c" dependencies = [ "ahash", "arrow-array", @@ -395,9 +395,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "55.2.0" +version = "56.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0377d532850babb4d927a06294314b316e23311503ed580ec6ce6a0158f49d40" +checksum = "c2155e26e17f053c8975c546fc70cf19c00542f9abf43c23a88a46ef7204204f" dependencies = [ "arrow-array", "arrow-buffer", @@ -1591,8 +1591,8 @@ dependencies = [ [[package]] name = "datafusion" -version = "49.0.2" -source = "git+https://github.com/polygon-io/arrow-datafusion?rev=faca92d#faca92de45a5b35e7101c043c90a26f3fce7ed86" +version = "50.0.0" +source = "git+https://github.com/polygon-io/arrow-datafusion?rev=5a85b7d#5a85b7d41638997eca683a146e535c0bf127bc68" dependencies = [ "arrow", "arrow-ipc", @@ -1618,6 +1618,7 @@ dependencies = [ "datafusion-functions-window", "datafusion-optimizer", "datafusion-physical-expr", + "datafusion-physical-expr-adapter", "datafusion-physical-expr-common", "datafusion-physical-optimizer", "datafusion-physical-plan", @@ -1640,8 +1641,8 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "49.0.2" -source = "git+https://github.com/polygon-io/arrow-datafusion?rev=faca92d#faca92de45a5b35e7101c043c90a26f3fce7ed86" +version = "50.0.0" +source = "git+https://github.com/polygon-io/arrow-datafusion?rev=5a85b7d#5a85b7d41638997eca683a146e535c0bf127bc68" dependencies = [ "arrow", "async-trait", @@ -1665,8 +1666,8 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "49.0.2" -source = "git+https://github.com/polygon-io/arrow-datafusion?rev=faca92d#faca92de45a5b35e7101c043c90a26f3fce7ed86" +version = "50.0.0" +source = "git+https://github.com/polygon-io/arrow-datafusion?rev=5a85b7d#5a85b7d41638997eca683a146e535c0bf127bc68" dependencies = [ "arrow", "async-trait", @@ -1687,8 +1688,8 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "49.0.2" -source = "git+https://github.com/polygon-io/arrow-datafusion?rev=faca92d#faca92de45a5b35e7101c043c90a26f3fce7ed86" +version = "50.0.0" +source = "git+https://github.com/polygon-io/arrow-datafusion?rev=5a85b7d#5a85b7d41638997eca683a146e535c0bf127bc68" dependencies = [ "ahash", "arrow", @@ -1710,8 +1711,8 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "49.0.2" -source = "git+https://github.com/polygon-io/arrow-datafusion?rev=faca92d#faca92de45a5b35e7101c043c90a26f3fce7ed86" +version = "50.0.0" +source = "git+https://github.com/polygon-io/arrow-datafusion?rev=5a85b7d#5a85b7d41638997eca683a146e535c0bf127bc68" dependencies = [ "futures", "log", @@ -1720,8 +1721,8 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "49.0.2" -source = "git+https://github.com/polygon-io/arrow-datafusion?rev=faca92d#faca92de45a5b35e7101c043c90a26f3fce7ed86" +version = "50.0.0" +source = "git+https://github.com/polygon-io/arrow-datafusion?rev=5a85b7d#5a85b7d41638997eca683a146e535c0bf127bc68" dependencies = [ "arrow", "async-trait", @@ -1732,6 +1733,7 @@ dependencies = [ "datafusion-execution", "datafusion-expr", "datafusion-physical-expr", + "datafusion-physical-expr-adapter", "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", @@ -1749,8 +1751,8 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "49.0.2" -source = "git+https://github.com/polygon-io/arrow-datafusion?rev=faca92d#faca92de45a5b35e7101c043c90a26f3fce7ed86" +version = "50.0.0" +source = "git+https://github.com/polygon-io/arrow-datafusion?rev=5a85b7d#5a85b7d41638997eca683a146e535c0bf127bc68" dependencies = [ "arrow", "async-trait", @@ -1773,8 +1775,8 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "49.0.2" -source = "git+https://github.com/polygon-io/arrow-datafusion?rev=faca92d#faca92de45a5b35e7101c043c90a26f3fce7ed86" +version = "50.0.0" +source = "git+https://github.com/polygon-io/arrow-datafusion?rev=5a85b7d#5a85b7d41638997eca683a146e535c0bf127bc68" dependencies = [ "arrow", "async-trait", @@ -1797,8 +1799,8 @@ dependencies = [ [[package]] name = "datafusion-datasource-parquet" -version = "49.0.2" -source = "git+https://github.com/polygon-io/arrow-datafusion?rev=faca92d#faca92de45a5b35e7101c043c90a26f3fce7ed86" +version = "50.0.0" +source = "git+https://github.com/polygon-io/arrow-datafusion?rev=5a85b7d#5a85b7d41638997eca683a146e535c0bf127bc68" dependencies = [ "arrow", "async-trait", @@ -1811,6 +1813,7 @@ dependencies = [ "datafusion-expr", "datafusion-functions-aggregate", "datafusion-physical-expr", + "datafusion-physical-expr-adapter", "datafusion-physical-expr-common", "datafusion-physical-optimizer", "datafusion-physical-plan", @@ -1828,15 +1831,16 @@ dependencies = [ [[package]] name = "datafusion-doc" -version = "49.0.2" -source = "git+https://github.com/polygon-io/arrow-datafusion?rev=faca92d#faca92de45a5b35e7101c043c90a26f3fce7ed86" +version = "50.0.0" +source = "git+https://github.com/polygon-io/arrow-datafusion?rev=5a85b7d#5a85b7d41638997eca683a146e535c0bf127bc68" [[package]] name = "datafusion-execution" -version = "49.0.2" -source = "git+https://github.com/polygon-io/arrow-datafusion?rev=faca92d#faca92de45a5b35e7101c043c90a26f3fce7ed86" +version = "50.0.0" +source = "git+https://github.com/polygon-io/arrow-datafusion?rev=5a85b7d#5a85b7d41638997eca683a146e535c0bf127bc68" dependencies = [ "arrow", + "async-trait", "dashmap", "datafusion-common", "datafusion-expr", @@ -1851,8 +1855,8 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "49.0.2" -source = "git+https://github.com/polygon-io/arrow-datafusion?rev=faca92d#faca92de45a5b35e7101c043c90a26f3fce7ed86" +version = "50.0.0" +source = "git+https://github.com/polygon-io/arrow-datafusion?rev=5a85b7d#5a85b7d41638997eca683a146e535c0bf127bc68" dependencies = [ "arrow", "async-trait", @@ -1871,8 +1875,8 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "49.0.2" -source = "git+https://github.com/polygon-io/arrow-datafusion?rev=faca92d#faca92de45a5b35e7101c043c90a26f3fce7ed86" +version = "50.0.0" +source = "git+https://github.com/polygon-io/arrow-datafusion?rev=5a85b7d#5a85b7d41638997eca683a146e535c0bf127bc68" dependencies = [ "arrow", "datafusion-common", @@ -1883,8 +1887,8 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "49.0.2" -source = "git+https://github.com/polygon-io/arrow-datafusion?rev=faca92d#faca92de45a5b35e7101c043c90a26f3fce7ed86" +version = "50.0.0" +source = "git+https://github.com/polygon-io/arrow-datafusion?rev=5a85b7d#5a85b7d41638997eca683a146e535c0bf127bc68" dependencies = [ "arrow", "arrow-buffer", @@ -1907,8 +1911,8 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "49.0.2" -source = "git+https://github.com/polygon-io/arrow-datafusion?rev=faca92d#faca92de45a5b35e7101c043c90a26f3fce7ed86" +version = "50.0.0" +source = "git+https://github.com/polygon-io/arrow-datafusion?rev=5a85b7d#5a85b7d41638997eca683a146e535c0bf127bc68" dependencies = [ "ahash", "arrow", @@ -1927,8 +1931,8 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "49.0.2" -source = "git+https://github.com/polygon-io/arrow-datafusion?rev=faca92d#faca92de45a5b35e7101c043c90a26f3fce7ed86" +version = "50.0.0" +source = "git+https://github.com/polygon-io/arrow-datafusion?rev=5a85b7d#5a85b7d41638997eca683a146e535c0bf127bc68" dependencies = [ "ahash", "arrow", @@ -1939,8 +1943,8 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "49.0.2" -source = "git+https://github.com/polygon-io/arrow-datafusion?rev=faca92d#faca92de45a5b35e7101c043c90a26f3fce7ed86" +version = "50.0.0" +source = "git+https://github.com/polygon-io/arrow-datafusion?rev=5a85b7d#5a85b7d41638997eca683a146e535c0bf127bc68" dependencies = [ "arrow", "arrow-ord", @@ -1960,8 +1964,8 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "49.0.2" -source = "git+https://github.com/polygon-io/arrow-datafusion?rev=faca92d#faca92de45a5b35e7101c043c90a26f3fce7ed86" +version = "50.0.0" +source = "git+https://github.com/polygon-io/arrow-datafusion?rev=5a85b7d#5a85b7d41638997eca683a146e535c0bf127bc68" dependencies = [ "arrow", "async-trait", @@ -1975,8 +1979,8 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "49.0.2" -source = "git+https://github.com/polygon-io/arrow-datafusion?rev=faca92d#faca92de45a5b35e7101c043c90a26f3fce7ed86" +version = "50.0.0" +source = "git+https://github.com/polygon-io/arrow-datafusion?rev=5a85b7d#5a85b7d41638997eca683a146e535c0bf127bc68" dependencies = [ "arrow", "datafusion-common", @@ -1992,8 +1996,8 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "49.0.2" -source = "git+https://github.com/polygon-io/arrow-datafusion?rev=faca92d#faca92de45a5b35e7101c043c90a26f3fce7ed86" +version = "50.0.0" +source = "git+https://github.com/polygon-io/arrow-datafusion?rev=5a85b7d#5a85b7d41638997eca683a146e535c0bf127bc68" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2001,8 +2005,8 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "49.0.2" -source = "git+https://github.com/polygon-io/arrow-datafusion?rev=faca92d#faca92de45a5b35e7101c043c90a26f3fce7ed86" +version = "50.0.0" +source = "git+https://github.com/polygon-io/arrow-datafusion?rev=5a85b7d#5a85b7d41638997eca683a146e535c0bf127bc68" dependencies = [ "datafusion-expr", "quote", @@ -2011,8 +2015,8 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "49.0.2" -source = "git+https://github.com/polygon-io/arrow-datafusion?rev=faca92d#faca92de45a5b35e7101c043c90a26f3fce7ed86" +version = "50.0.0" +source = "git+https://github.com/polygon-io/arrow-datafusion?rev=5a85b7d#5a85b7d41638997eca683a146e535c0bf127bc68" dependencies = [ "arrow", "chrono", @@ -2029,8 +2033,8 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "49.0.2" -source = "git+https://github.com/polygon-io/arrow-datafusion?rev=faca92d#faca92de45a5b35e7101c043c90a26f3fce7ed86" +version = "50.0.0" +source = "git+https://github.com/polygon-io/arrow-datafusion?rev=5a85b7d#5a85b7d41638997eca683a146e535c0bf127bc68" dependencies = [ "ahash", "arrow", @@ -2044,14 +2048,29 @@ dependencies = [ "indexmap", "itertools 0.14.0", "log", + "parking_lot", "paste", "petgraph 0.8.2", ] +[[package]] +name = "datafusion-physical-expr-adapter" +version = "50.0.0" +source = "git+https://github.com/polygon-io/arrow-datafusion?rev=5a85b7d#5a85b7d41638997eca683a146e535c0bf127bc68" +dependencies = [ + "arrow", + "datafusion-common", + "datafusion-expr", + "datafusion-functions", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "itertools 0.14.0", +] + [[package]] name = "datafusion-physical-expr-common" -version = "49.0.2" -source = "git+https://github.com/polygon-io/arrow-datafusion?rev=faca92d#faca92de45a5b35e7101c043c90a26f3fce7ed86" +version = "50.0.0" +source = "git+https://github.com/polygon-io/arrow-datafusion?rev=5a85b7d#5a85b7d41638997eca683a146e535c0bf127bc68" dependencies = [ "ahash", "arrow", @@ -2063,8 +2082,8 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "49.0.2" -source = "git+https://github.com/polygon-io/arrow-datafusion?rev=faca92d#faca92de45a5b35e7101c043c90a26f3fce7ed86" +version = "50.0.0" +source = "git+https://github.com/polygon-io/arrow-datafusion?rev=5a85b7d#5a85b7d41638997eca683a146e535c0bf127bc68" dependencies = [ "arrow", "datafusion-common", @@ -2081,8 +2100,8 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "49.0.2" -source = "git+https://github.com/polygon-io/arrow-datafusion?rev=faca92d#faca92de45a5b35e7101c043c90a26f3fce7ed86" +version = "50.0.0" +source = "git+https://github.com/polygon-io/arrow-datafusion?rev=5a85b7d#5a85b7d41638997eca683a146e535c0bf127bc68" dependencies = [ "ahash", "arrow", @@ -2094,6 +2113,7 @@ dependencies = [ "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", + "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr", "datafusion-physical-expr-common", @@ -2110,8 +2130,8 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "49.0.2" -source = "git+https://github.com/polygon-io/arrow-datafusion?rev=faca92d#faca92de45a5b35e7101c043c90a26f3fce7ed86" +version = "50.0.0" +source = "git+https://github.com/polygon-io/arrow-datafusion?rev=5a85b7d#5a85b7d41638997eca683a146e535c0bf127bc68" dependencies = [ "arrow", "arrow-schema", @@ -2127,8 +2147,8 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "49.0.2" -source = "git+https://github.com/polygon-io/arrow-datafusion?rev=faca92d#faca92de45a5b35e7101c043c90a26f3fce7ed86" +version = "50.0.0" +source = "git+https://github.com/polygon-io/arrow-datafusion?rev=5a85b7d#5a85b7d41638997eca683a146e535c0bf127bc68" dependencies = [ "arrow", "async-trait", @@ -2150,8 +2170,8 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "49.0.2" -source = "git+https://github.com/polygon-io/arrow-datafusion?rev=faca92d#faca92de45a5b35e7101c043c90a26f3fce7ed86" +version = "50.0.0" +source = "git+https://github.com/polygon-io/arrow-datafusion?rev=5a85b7d#5a85b7d41638997eca683a146e535c0bf127bc68" dependencies = [ "arrow", "bigdecimal", @@ -4330,9 +4350,9 @@ dependencies = [ [[package]] name = "parquet" -version = "55.2.0" +version = "56.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b17da4150748086bd43352bc77372efa9b6e3dbd06a04831d2a98c041c225cfa" +checksum = "89b56b41d1bd36aae415e42f91cae70ee75cf6cba74416b14dce3e958d5990ec" dependencies = [ "ahash", "arrow-array", @@ -5551,9 +5571,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.55.0" +version = "0.58.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4521174166bac1ff04fe16ef4524c70144cd29682a45978978ca3d7f4e0be11" +checksum = "ec4b661c54b1e4b603b37873a18c59920e4c51ea8ea2cf527d925424dbd4437c" dependencies = [ "log", "sqlparser_derive", @@ -6145,13 +6165,15 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tpchgen" -version = "1.1.1" -source = "git+https://github.com/clflushopt/tpchgen-rs.git?rev=d849ff430cd52250f6891ed4d5e3adad77bb2698#d849ff430cd52250f6891ed4d5e3adad77bb2698" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5315a6fb66b84b9dc4ade3ce3d85fc246305c87a8106193e9565b8a45394cbe" [[package]] name = "tpchgen-arrow" -version = "1.1.1" -source = "git+https://github.com/clflushopt/tpchgen-rs.git?rev=d849ff430cd52250f6891ed4d5e3adad77bb2698#d849ff430cd52250f6891ed4d5e3adad77bb2698" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9e70da6e86f54ff3524628dc84cb0a9e674ea28e8a8a82fe3839af8c7fd743b" dependencies = [ "arrow", "tpchgen", @@ -6589,6 +6611,8 @@ dependencies = [ "datafusion-execution", "datafusion-expr", "datafusion-physical-expr", + "datafusion-physical-expr-adapter", + "datafusion-physical-expr-common", "datafusion-physical-plan", "futures", "itertools 0.14.0", diff --git a/Cargo.toml b/Cargo.toml index 8f4a57cbfe0..3d34daebe77 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -59,18 +59,19 @@ anyhow = "1.0.95" arbitrary = "1.3.2" arcref = "0.2.0" arrayref = "0.3.7" -arrow-arith = "55.2.0" -arrow-array = "55.2.0" -arrow-buffer = "55.2.0" -arrow-cast = "55.2.0" -arrow-data = "55.2.0" -arrow-ipc = "55.2.0" -arrow-ord = "55.2.0" -arrow-schema = "55.2.0" -arrow-select = "55.2.0" -arrow-string = "55.2.0" +arrow-arith = "56" +arrow-array = "56" +arrow-buffer = "56" +arrow-cast = "56" +arrow-data = "56" +arrow-ipc = "56" +arrow-ord = "56" +arrow-schema = "56" +arrow-select = "56" +arrow-string = "56" +async-compat = "0.2.5" async-stream = "0.3.6" -async-trait = "0.1.88" +async-trait = "0.1.89" bindgen = "0.72.0" bit-vec = "0.8.0" bitvec = "1.0.1" @@ -86,15 +87,17 @@ crossbeam-deque = "0.8.6" crossbeam-queue = "0.3.12" crossterm = "0.29" dashmap = "6.1.0" -datafusion = { git = "https://github.com/polygon-io/arrow-datafusion", rev = "faca92d", default-features = false } -datafusion-catalog = { git = "https://github.com/polygon-io/arrow-datafusion", rev = "faca92d" } -datafusion-common = { git = "https://github.com/polygon-io/arrow-datafusion", rev = "faca92d" } -datafusion-common-runtime = { git = "https://github.com/polygon-io/arrow-datafusion", rev = "faca92d" } -datafusion-datasource = { git = "https://github.com/polygon-io/arrow-datafusion", rev = "faca92d", default-features = false } -datafusion-execution = { git = "https://github.com/polygon-io/arrow-datafusion", rev = "faca92d" } -datafusion-expr = { git = "https://github.com/polygon-io/arrow-datafusion", rev = "faca92d" } -datafusion-physical-expr = { git = "https://github.com/polygon-io/arrow-datafusion", rev = "faca92d" } -datafusion-physical-plan = { git = "https://github.com/polygon-io/arrow-datafusion", rev = "faca92d" } +datafusion = { git = "https://github.com/polygon-io/arrow-datafusion", rev = "5a85b7d", default-features = false } +datafusion-catalog = { git = "https://github.com/polygon-io/arrow-datafusion", rev = "5a85b7d" } +datafusion-common = { git = "https://github.com/polygon-io/arrow-datafusion", rev = "5a85b7d" } +datafusion-common-runtime = { git = "https://github.com/polygon-io/arrow-datafusion", rev = "5a85b7d" } +datafusion-datasource = { git = "https://github.com/polygon-io/arrow-datafusion", rev = "5a85b7d", default-features = false } +datafusion-execution = { git = "https://github.com/polygon-io/arrow-datafusion", rev = "5a85b7d" } +datafusion-expr = { git = "https://github.com/polygon-io/arrow-datafusion", rev = "5a85b7d" } +datafusion-physical-expr = { git = "https://github.com/polygon-io/arrow-datafusion", rev = "5a85b7d" } +datafusion-physical-expr-adapter = { git = "https://github.com/polygon-io/arrow-datafusion", rev = "5a85b7d" } +datafusion-physical-expr-common = { git = "https://github.com/polygon-io/arrow-datafusion", rev = "5a85b7d" } +datafusion-physical-plan = { git = "https://github.com/polygon-io/arrow-datafusion", rev = "5a85b7d" } dirs = "6.0.0" divan = { package = "codspeed-divan-compat", version = "3.0" } dyn-hash = "0.2.0" @@ -137,7 +140,7 @@ opentelemetry = "0.30.0" opentelemetry-otlp = "0.30.0" opentelemetry_sdk = "0.30.0" parking_lot = { version = "0.12.3", features = ["nightly"] } -parquet = "55.2.0" +parquet = "56" paste = "1.0.15" pco = "0.4.4" pin-project = "1.1.5" @@ -183,17 +186,16 @@ target-lexicon = "0.13" tempfile = "3" termtree = { version = "0.5" } thiserror = "2.0.3" -tokio = { version = "1.46" } +tokio = { version = "1.47" } tokio-stream = "0.1.17" tokio-util = { version = "0.7.16" } -# replace these with releases -tpchgen = { git = "https://github.com/clflushopt/tpchgen-rs.git", rev = "d849ff430cd52250f6891ed4d5e3adad77bb2698" } -tpchgen-arrow = { git = "https://github.com/clflushopt/tpchgen-rs.git", rev = "d849ff430cd52250f6891ed4d5e3adad77bb2698" } +tpchgen = { version = "2" } +tpchgen-arrow = { version = "2" } tracing = { version = "0.1.41" } tracing-perfetto = "0.1.5" tracing-subscriber = "0.3.20" -url = "2.5.4" -uuid = { version = "1.17", features = ["js"] } +url = "2.5.7" +uuid = { version = "1.18", features = ["js"] } walkdir = "2.5.0" wasm-bindgen-futures = "0.4.39" witchcraft-metrics = "1.0.1" diff --git a/bench-vortex/Cargo.toml b/bench-vortex/Cargo.toml index c88cd85f158..cba0256205c 100644 --- a/bench-vortex/Cargo.toml +++ b/bench-vortex/Cargo.toml @@ -29,6 +29,7 @@ datafusion = { workspace = true, features = [ "parquet", "datetime_expressions", "nested_expressions", + "unicode_expressions", ] } datafusion-common = { workspace = true } datafusion-physical-plan = { workspace = true } diff --git a/vortex-array/src/arrow/compute/to_arrow/canonical.rs b/vortex-array/src/arrow/compute/to_arrow/canonical.rs index ec9996183a9..aae6af17935 100644 --- a/vortex-array/src/arrow/compute/to_arrow/canonical.rs +++ b/vortex-array/src/arrow/compute/to_arrow/canonical.rs @@ -10,6 +10,7 @@ use arrow_array::types::{ }; use arrow_array::{ Array, ArrayRef as ArrowArrayRef, ArrowPrimitiveType, BooleanArray as ArrowBoolArray, + Decimal32Array as ArrowDecimal32Array, Decimal64Array as ArrowDecimal64Array, Decimal128Array as ArrowDecimal128Array, Decimal256Array as ArrowDecimal256Array, GenericByteArray, GenericByteViewArray, GenericListArray, NullArray as ArrowNullArray, OffsetSizeTrait, PrimitiveArray as ArrowPrimitiveArray, StructArray as ArrowStructArray, @@ -110,6 +111,34 @@ impl Kernel for ToArrowCanonical { { to_arrow_primitive::(array) } + (Canonical::Decimal(array), DataType::Decimal32(precision, scale)) => { + if array.decimal_dtype().precision() != *precision + || array.decimal_dtype().scale() != *scale + { + vortex_bail!( + "ToArrowCanonical: target precision/scale {}/{} does not match array precision/scale {}/{}", + precision, + scale, + array.decimal_dtype().precision(), + array.decimal_dtype().scale() + ); + } + to_arrow_decimal32(array) + } + (Canonical::Decimal(array), DataType::Decimal64(precision, scale)) => { + if array.decimal_dtype().precision() != *precision + || array.decimal_dtype().scale() != *scale + { + vortex_bail!( + "ToArrowCanonical: target precision/scale {}/{} does not match array precision/scale {}/{}", + precision, + scale, + array.decimal_dtype().precision(), + array.decimal_dtype().scale() + ); + } + to_arrow_decimal64(array) + } (Canonical::Decimal(array), DataType::Decimal128(precision, scale)) => { if array.decimal_dtype().precision() != *precision || array.decimal_dtype().scale() != *scale @@ -217,6 +246,91 @@ fn to_arrow_primitive(array: PrimitiveArray) -> VortexRes ))) } +fn to_arrow_decimal32(array: DecimalArray) -> VortexResult { + let null_buffer = array.validity_mask().to_null_buffer(); + let buffer: Buffer = match array.values_type() { + DecimalValueType::I8 => { + Buffer::from_trusted_len_iter(array.buffer::().into_iter().map(|x| x.as_())) + } + DecimalValueType::I16 => { + Buffer::from_trusted_len_iter(array.buffer::().into_iter().map(|x| x.as_())) + } + DecimalValueType::I32 => array.buffer::(), + DecimalValueType::I64 => array + .buffer::() + .into_iter() + .map(|x| { + x.to_i32() + .ok_or_else(|| vortex_err!("i64 to i32 narrowing cannot be done safely")) + }) + .process_results(|iter| Buffer::from_trusted_len_iter(iter))?, + DecimalValueType::I128 => array + .buffer::() + .into_iter() + .map(|x| { + x.to_i32() + .ok_or_else(|| vortex_err!("i128 to i32 narrowing cannot be done safely")) + }) + .process_results(|iter| Buffer::from_trusted_len_iter(iter))?, + DecimalValueType::I256 => array + .buffer::() + .into_iter() + .map(|x| { + x.to_i32() + .ok_or_else(|| vortex_err!("i256 to i32 narrowing cannot be done safely")) + }) + .process_results(|iter| Buffer::from_trusted_len_iter(iter))?, + _ => vortex_bail!("unknown value type {:?}", array.values_type()), + }; + Ok(Arc::new( + ArrowDecimal32Array::new(buffer.into_arrow_scalar_buffer(), null_buffer) + .with_precision_and_scale( + array.decimal_dtype().precision(), + array.decimal_dtype().scale(), + )?, + )) +} + +fn to_arrow_decimal64(array: DecimalArray) -> VortexResult { + let null_buffer = array.validity_mask().to_null_buffer(); + let buffer: Buffer = match array.values_type() { + DecimalValueType::I8 => { + Buffer::from_trusted_len_iter(array.buffer::().into_iter().map(|x| x.as_())) + } + DecimalValueType::I16 => { + Buffer::from_trusted_len_iter(array.buffer::().into_iter().map(|x| x.as_())) + } + DecimalValueType::I32 => { + Buffer::from_trusted_len_iter(array.buffer::().into_iter().map(|x| x.as_())) + } + DecimalValueType::I64 => array.buffer::(), + DecimalValueType::I128 => array + .buffer::() + .into_iter() + .map(|x| { + x.to_i64() + .ok_or_else(|| vortex_err!("i128 to i64 narrowing cannot be done safely")) + }) + .process_results(|iter| Buffer::from_trusted_len_iter(iter))?, + DecimalValueType::I256 => array + .buffer::() + .into_iter() + .map(|x| { + x.to_i64() + .ok_or_else(|| vortex_err!("i256 to i64 narrowing cannot be done safely")) + }) + .process_results(|iter| Buffer::from_trusted_len_iter(iter))?, + _ => vortex_bail!("unknown value type {:?}", array.values_type()), + }; + Ok(Arc::new( + ArrowDecimal64Array::new(buffer.into_arrow_scalar_buffer(), null_buffer) + .with_precision_and_scale( + array.decimal_dtype().precision(), + array.decimal_dtype().scale(), + )?, + )) +} + fn to_arrow_decimal128(array: DecimalArray) -> VortexResult { let null_buffer = array.validity_mask().to_null_buffer(); let buffer: Buffer = match array.values_type() { @@ -540,6 +654,60 @@ mod tests { assert_eq!(arrow_decimal.value(2), 12); } + #[rstest] + #[case(0i8)] + #[case(0i16)] + #[case(0i32)] + #[case(0i64)] + #[case(0i128)] + #[case(vortex_scalar::i256::ZERO)] + fn to_arrow_decimal32(#[case] _decimal_type: T) { + use arrow_array::Decimal32Array; + + let mut decimal = DecimalBuilder::new::(2, 1, false.into()); + decimal.append_value(10); + decimal.append_value(11); + decimal.append_value(12); + + let decimal = decimal.finish(); + + let arrow_array = decimal.into_arrow(&DataType::Decimal32(2, 1)).unwrap(); + let arrow_decimal = arrow_array + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(arrow_decimal.value(0), 10); + assert_eq!(arrow_decimal.value(1), 11); + assert_eq!(arrow_decimal.value(2), 12); + } + + #[rstest] + #[case(0i8)] + #[case(0i16)] + #[case(0i32)] + #[case(0i64)] + #[case(0i128)] + #[case(vortex_scalar::i256::ZERO)] + fn to_arrow_decimal64(#[case] _decimal_type: T) { + use arrow_array::Decimal64Array; + + let mut decimal = DecimalBuilder::new::(2, 1, false.into()); + decimal.append_value(10); + decimal.append_value(11); + decimal.append_value(12); + + let decimal = decimal.finish(); + + let arrow_array = decimal.into_arrow(&DataType::Decimal64(2, 1)).unwrap(); + let arrow_decimal = arrow_array + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(arrow_decimal.value(0), 10); + assert_eq!(arrow_decimal.value(1), 11); + assert_eq!(arrow_decimal.value(2), 12); + } + #[rstest] #[case(0i8)] #[case(0i16)] diff --git a/vortex-array/src/arrow/convert.rs b/vortex-array/src/arrow/convert.rs index 93fc95a9830..69c57da1c3c 100644 --- a/vortex-array/src/arrow/convert.rs +++ b/vortex-array/src/arrow/convert.rs @@ -5,11 +5,11 @@ use std::sync::Arc; use arrow_array::cast::{AsArray, as_null_array}; use arrow_array::types::{ - ByteArrayType, ByteViewType, Date32Type, Date64Type, Decimal128Type, Decimal256Type, - Float16Type, Float32Type, Float64Type, Int8Type, Int16Type, Int32Type, Int64Type, - Time32MillisecondType, Time32SecondType, Time64MicrosecondType, Time64NanosecondType, - TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType, - TimestampSecondType, UInt8Type, UInt16Type, UInt32Type, UInt64Type, + ByteArrayType, ByteViewType, Date32Type, Date64Type, Decimal32Type, Decimal64Type, + Decimal128Type, Decimal256Type, Float16Type, Float32Type, Float64Type, Int8Type, Int16Type, + Int32Type, Int64Type, Time32MillisecondType, Time32SecondType, Time64MicrosecondType, + Time64NanosecondType, TimestampMicrosecondType, TimestampMillisecondType, + TimestampNanosecondType, TimestampSecondType, UInt8Type, UInt16Type, UInt32Type, UInt64Type, }; use arrow_array::{ Array as ArrowArray, ArrowPrimitiveType, BooleanArray as ArrowBooleanArray, GenericByteArray, @@ -103,6 +103,24 @@ impl_from_arrow_primitive!(Float16Type); impl_from_arrow_primitive!(Float32Type); impl_from_arrow_primitive!(Float64Type); +impl FromArrowArray<&ArrowPrimitiveArray> for ArrayRef { + fn from_arrow(array: &ArrowPrimitiveArray, nullable: bool) -> Self { + let decimal_type = DecimalDType::new(array.precision(), array.scale()); + let buffer = Buffer::from_arrow_scalar_buffer(array.values().clone()); + let validity = nulls(array.nulls(), nullable); + DecimalArray::new(buffer, decimal_type, validity).into_array() + } +} + +impl FromArrowArray<&ArrowPrimitiveArray> for ArrayRef { + fn from_arrow(array: &ArrowPrimitiveArray, nullable: bool) -> Self { + let decimal_type = DecimalDType::new(array.precision(), array.scale()); + let buffer = Buffer::from_arrow_scalar_buffer(array.values().clone()); + let validity = nulls(array.nulls(), nullable); + DecimalArray::new(buffer, decimal_type, validity).into_array() + } +} + impl FromArrowArray<&ArrowPrimitiveArray> for ArrayRef { fn from_arrow(array: &ArrowPrimitiveArray, nullable: bool) -> Self { let decimal_type = DecimalDType::new(array.precision(), array.scale()); @@ -413,6 +431,12 @@ impl FromArrowArray<&dyn ArrowArray> for ArrayRef { } ArrowTimeUnit::Second | ArrowTimeUnit::Millisecond => unreachable!(), }, + DataType::Decimal32(..) => { + Self::from_arrow(array.as_primitive::(), nullable) + } + DataType::Decimal64(..) => { + Self::from_arrow(array.as_primitive::(), nullable) + } DataType::Decimal128(..) => { Self::from_arrow(array.as_primitive::(), nullable) } diff --git a/vortex-datafusion/Cargo.toml b/vortex-datafusion/Cargo.toml index 2405fabc9d2..060374ea45c 100644 --- a/vortex-datafusion/Cargo.toml +++ b/vortex-datafusion/Cargo.toml @@ -24,6 +24,8 @@ datafusion-datasource = { workspace = true, default-features = false } datafusion-execution = { workspace = true } datafusion-expr = { workspace = true } datafusion-physical-expr = { workspace = true } +datafusion-physical-expr-adapter = { workspace = true } +datafusion-physical-expr-common = { workspace = true } datafusion-physical-plan = { workspace = true } futures = { workspace = true } itertools = { workspace = true } diff --git a/vortex-datafusion/src/convert/exprs.rs b/vortex-datafusion/src/convert/exprs.rs index 317200167cb..8f998044e8f 100644 --- a/vortex-datafusion/src/convert/exprs.rs +++ b/vortex-datafusion/src/convert/exprs.rs @@ -6,6 +6,7 @@ use std::sync::Arc; use arrow_schema::{DataType, Schema}; use datafusion_expr::Operator as DFOperator; use datafusion_physical_expr::{PhysicalExpr, PhysicalExprRef}; +use datafusion_physical_expr_common::physical_expr::is_dynamic_physical_expr; use datafusion_physical_plan::expressions as df_expr; use vortex::error::{VortexResult, vortex_bail, vortex_err}; use vortex::expr::{BinaryExpr, ExprRef, LikeExpr, Operator, and, get_item, lit, root}; @@ -122,6 +123,12 @@ impl TryFromDataFusion for Operator { } pub(crate) fn can_be_pushed_down(expr: &PhysicalExprRef, schema: &Schema) -> bool { + // We currently do not support pushdown of dynamic expressions in DF. + // See issue: https://github.com/vortex-data/vortex/issues/4034 + if is_dynamic_physical_expr(expr) { + return false; + } + let expr = expr.as_any(); if let Some(binary) = expr.downcast_ref::() { can_binary_be_pushed_down(binary, schema) diff --git a/vortex-datafusion/src/persistent/opener.rs b/vortex-datafusion/src/persistent/opener.rs index 2abf31f661c..c51977657e3 100644 --- a/vortex-datafusion/src/persistent/opener.rs +++ b/vortex-datafusion/src/persistent/opener.rs @@ -10,9 +10,9 @@ use datafusion_datasource::file_meta::FileMeta; use datafusion_datasource::file_stream::{FileOpenFuture, FileOpener}; use datafusion_datasource::schema_adapter::SchemaAdapterFactory; use datafusion_datasource::{FileRange, PartitionedFile}; -use datafusion_physical_expr::schema_rewriter::PhysicalExprAdapterFactory; use datafusion_physical_expr::simplifier::PhysicalExprSimplifier; use datafusion_physical_expr::{PhysicalExprRef, split_conjunction}; +use datafusion_physical_expr_adapter::PhysicalExprAdapterFactory; use futures::{FutureExt, StreamExt, TryStreamExt, stream}; use object_store::ObjectStore; use object_store::path::Path; @@ -215,9 +215,7 @@ impl FileOpener for VortexOpener { )))) }) .try_flatten() - .map(move |batch| { - batch.and_then(|b| schema_mapping.map_batch(b).map_err(Into::into)) - }) + .map(move |batch| batch.and_then(|b| schema_mapping.map_batch(b))) .boxed(); Ok(stream) @@ -256,7 +254,6 @@ fn byte_range_to_row_range(byte_range: Range, row_count: u64, total_size: u #[cfg(test)] mod tests { use chrono::Utc; - use datafusion::arrow; use datafusion::arrow::array::RecordBatch; use datafusion::arrow::datatypes::{DataType, Schema}; use datafusion::arrow::util::pretty::print_batches; @@ -264,7 +261,7 @@ mod tests { use datafusion::datasource::schema_adapter::DefaultSchemaAdapterFactory; use datafusion::logical_expr::{col, lit}; use datafusion::physical_expr::planner::logical2physical; - use datafusion::physical_expr::schema_rewriter::DefaultPhysicalExprAdapterFactory; + use datafusion::physical_expr_adapter::DefaultPhysicalExprAdapterFactory; use datafusion::scalar::ScalarValue; use futures::stream::BoxStream; use itertools::Itertools; @@ -335,7 +332,7 @@ mod tests { } async fn count_data( - mut stream: BoxStream<'static, Result>, + mut stream: BoxStream<'static, Result>, ) -> anyhow::Result<(usize, usize)> { let mut batches = vec![]; diff --git a/vortex-datafusion/src/persistent/source.rs b/vortex-datafusion/src/persistent/source.rs index 369b493fb5f..07c644f0c0d 100644 --- a/vortex-datafusion/src/persistent/source.rs +++ b/vortex-datafusion/src/persistent/source.rs @@ -12,10 +12,10 @@ use datafusion_datasource::file::FileSource; use datafusion_datasource::file_scan_config::FileScanConfig; use datafusion_datasource::file_stream::FileOpener; use datafusion_datasource::schema_adapter::{DefaultSchemaAdapterFactory, SchemaAdapterFactory}; -use datafusion_physical_expr::schema_rewriter::{ +use datafusion_physical_expr::{PhysicalExprRef, conjunction}; +use datafusion_physical_expr_adapter::{ DefaultPhysicalExprAdapterFactory, PhysicalExprAdapterFactory, }; -use datafusion_physical_expr::{PhysicalExprRef, conjunction}; use datafusion_physical_plan::filter_pushdown::{ FilterPushdownPropagation, PushedDown, PushedDownPredicate, }; diff --git a/vortex-dtype/src/arrow.rs b/vortex-dtype/src/arrow.rs index 528e7e687e0..e73545b49e6 100644 --- a/vortex-dtype/src/arrow.rs +++ b/vortex-dtype/src/arrow.rs @@ -15,9 +15,7 @@ use std::sync::Arc; -use arrow_schema::{ - DECIMAL128_MAX_PRECISION, DataType, Field, FieldRef, Fields, Schema, SchemaBuilder, SchemaRef, -}; +use arrow_schema::{DataType, Field, FieldRef, Fields, Schema, SchemaBuilder, SchemaRef}; use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err}; use crate::datetime::arrow::{make_arrow_temporal_dtype, make_temporal_ext_dtype}; @@ -61,8 +59,11 @@ impl TryFromArrowType<&DataType> for PType { impl TryFromArrowType<&DataType> for DecimalDType { fn try_from_arrow(value: &DataType) -> VortexResult { match value { - DataType::Decimal128(precision, scale) => Self::try_new(*precision, *scale), - DataType::Decimal256(precision, scale) => Self::try_new(*precision, *scale), + DataType::Decimal32(precision, scale) + | DataType::Decimal64(precision, scale) + | DataType::Decimal128(precision, scale) + | DataType::Decimal256(precision, scale) => Self::try_new(*precision, *scale), + _ => Err(vortex_err!( "Arrow datatype {:?} cannot be converted to DecimalDType", value @@ -108,7 +109,10 @@ impl FromArrowType<(&DataType, Nullability)> for DType { match data_type { DataType::Null => DType::Null, - DataType::Decimal128(precision, scale) | DataType::Decimal256(precision, scale) => { + DataType::Decimal32(precision, scale) + | DataType::Decimal64(precision, scale) + | DataType::Decimal128(precision, scale) + | DataType::Decimal256(precision, scale) => { DType::Decimal(DecimalDType::new(*precision, *scale), nullability) } DataType::Boolean => DType::Bool(nullability), @@ -188,10 +192,19 @@ impl DType { PType::F64 => DataType::Float64, }, DType::Decimal(dt, _) => { - if dt.precision() > DECIMAL128_MAX_PRECISION { - DataType::Decimal256(dt.precision(), dt.scale()) - } else { - DataType::Decimal128(dt.precision(), dt.scale()) + let precision = dt.precision(); + let scale = dt.scale(); + + match precision { + // This code is commented out until DataFusion improves its support for smaller decimals. + // // DECIMAL32_MAX_PRECISION + // 0..=9 => DataType::Decimal32(precision, scale), + // // DECIMAL64_MAX_PRECISION + // 10..=18 => DataType::Decimal64(precision, scale), + // DECIMAL128_MAX_PRECISION + 0..=38 => DataType::Decimal128(precision, scale), + // DECIMAL256_MAX_PRECISION + 39.. => DataType::Decimal256(precision, scale), } } DType::Utf8(_) => DataType::Utf8View,