diff --git a/native/Cargo.lock b/native/Cargo.lock index 9709cd6f17..3174f9b508 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -135,9 +135,12 @@ dependencies = [ [[package]] name = "arc-swap" -version = "1.7.1" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" +checksum = "51d03449bb8ca2cc2ef70869af31463d1ae5ccc8fa3e334b307203fbf815207e" +dependencies = [ + "rustversion", +] [[package]] name = "array-init" @@ -417,19 +420,15 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.19" +version = "0.4.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06575e6a9673580f52661c92107baabffbf41e2141373441cbcdc47cb733003c" +checksum = "98ec5f6c2f8bc326c994cb9e241cc257ddaba9afa8555a43cffbb5dd86efaa37" dependencies = [ - "bzip2 0.5.2", - "flate2", + "compression-codecs", + "compression-core", "futures-core", - "memchr", "pin-project-lite", "tokio", - "xz2", - "zstd", - "zstd-safe", ] [[package]] @@ -481,9 +480,9 @@ dependencies = [ [[package]] name = "async-lock" -version = "3.4.1" +version = "3.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fd03604047cee9b6ce9de9f70c6cd540a0520c813cbd49bae61f33ab80ed1dc" +checksum = "290f7f2596bd5b78a9fec8088ccd89180d7f9f55b94b0576823bbbdc72ee8311" dependencies = [ "event-listener 5.4.1", "event-listener-strategy", @@ -530,7 +529,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -817,9 +816,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime" -version = "1.9.6" +version = "1.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65fda37911905ea4d3141a01364bc5509a0f32ae3f3b22d6e330c0abfb62d247" +checksum = "a392db6c583ea4a912538afb86b7be7c5d8887d91604f50eb55c262ee1b4a5f5" dependencies = [ "aws-smithy-async", "aws-smithy-http", @@ -946,9 +945,9 @@ dependencies = [ [[package]] name = "bigdecimal" -version = "0.4.9" +version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "560f42649de9fa436b73517378a147ec21f6c997a546581df4b4b31677828934" +checksum = "4d6867f1565b3aad85681f1015055b087fcfd840d6aeee6eee7f2da317603695" dependencies = [ "autocfg", "libm", @@ -1003,7 +1002,7 @@ dependencies = [ "regex", "rustc-hash 2.1.1", "shlex", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -1096,7 +1095,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -1119,7 +1118,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -1199,15 +1198,6 @@ dependencies = [ "either", ] -[[package]] -name = "bzip2" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47" -dependencies = [ - "bzip2-sys", -] - [[package]] name = "bzip2" version = "0.6.1" @@ -1217,16 +1207,6 @@ dependencies = [ "libbz2-rs-sys", ] -[[package]] -name = "bzip2-sys" -version = "0.1.13+1.0.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" -dependencies = [ - "cc", - "pkg-config", -] - [[package]] name = "cast" version = "0.3.0" @@ -1235,9 +1215,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.50" +version = "1.2.51" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f50d563227a1c37cc0a263f64eca3334388c01c5e4c4861a9def205c614383c" +checksum = "7a0aeaff4ff1a90589618835a598e545176939b97874f7abc7851caa0618f203" dependencies = [ "find-msvc-tools", "jobserver", @@ -1336,18 +1316,18 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.53" +version = "4.5.54" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9e340e012a1bf4935f5282ed1436d1489548e8f72308207ea5df0e23d2d03f8" +checksum = "c6e6ff9dcd79cff5cd969a17a545d79e84ab086e444102a591e288a8aa3ce394" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.53" +version = "4.5.54" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d76b5d13eaa18c901fd2f7fca939fefe3a0727a953561fefdf3b2922b8569d00" +checksum = "fa42cf4d2b7a41bc8f663a7cab4031ebafa1bf3875705bfaf8466dc60ab52c00" dependencies = [ "anstyle", "clap_lex", @@ -1388,6 +1368,27 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "compression-codecs" +version = "0.4.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0f7ac3e5b97fdce45e8922fb05cae2c37f7bbd63d30dd94821dacfd8f3f2bf2" +dependencies = [ + "bzip2", + "compression-core", + "flate2", + "liblzma", + "memchr", + "zstd", + "zstd-safe", +] + +[[package]] +name = "compression-core" +version = "0.4.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75984efb6ed102a0d42db99afb6c1948f0380d1d91808d5529916e6c08b49d8d" + [[package]] name = "concurrent-queue" version = "2.5.0" @@ -1618,7 +1619,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -1632,7 +1633,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -1643,7 +1644,7 @@ checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" dependencies = [ "darling_core 0.20.11", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -1654,7 +1655,7 @@ checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" dependencies = [ "darling_core 0.21.3", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -1674,8 +1675,7 @@ dependencies = [ [[package]] name = "datafusion" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ba7cb113e9c0bedf9e9765926031e132fa05a1b09ba6e93a6d1a4d7044457b8" +source = "git+https://github.com/apache/datafusion?branch=branch-52#aee5cd9f3517b2ac9536fd4eb254f1e1349711df" dependencies = [ "arrow", "arrow-schema", @@ -1715,7 +1715,6 @@ dependencies = [ "parquet", "rand 0.9.2", "regex", - "rstest", "sqlparser", "tempfile", "tokio", @@ -1726,8 +1725,7 @@ dependencies = [ [[package]] name = "datafusion-catalog" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66a3a799f914a59b1ea343906a0486f17061f39509af74e874a866428951130d" +source = "git+https://github.com/apache/datafusion?branch=branch-52#aee5cd9f3517b2ac9536fd4eb254f1e1349711df" dependencies = [ "arrow", "async-trait", @@ -1751,8 +1749,7 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db1b113c80d7a0febcd901476a57aef378e717c54517a163ed51417d87621b0" +source = "git+https://github.com/apache/datafusion?branch=branch-52#aee5cd9f3517b2ac9536fd4eb254f1e1349711df" dependencies = [ "arrow", "async-trait", @@ -1769,7 +1766,6 @@ dependencies = [ "itertools 0.14.0", "log", "object_store", - "tokio", ] [[package]] @@ -1891,15 +1887,14 @@ dependencies = [ [[package]] name = "datafusion-common" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c10f7659e96127d25e8366be7c8be4109595d6a2c3eac70421f380a7006a1b0" +source = "git+https://github.com/apache/datafusion?branch=branch-52#aee5cd9f3517b2ac9536fd4eb254f1e1349711df" dependencies = [ "ahash 0.8.12", "arrow", "arrow-ipc", "chrono", "half", - "hashbrown 0.14.5", + "hashbrown 0.16.1", "hex", "indexmap 2.12.1", "libc", @@ -1915,8 +1910,7 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b92065bbc6532c6651e2f7dd30b55cba0c7a14f860c7e1d15f165c41a1868d95" +source = "git+https://github.com/apache/datafusion?branch=branch-52#aee5cd9f3517b2ac9536fd4eb254f1e1349711df" dependencies = [ "futures", "log", @@ -1926,14 +1920,13 @@ dependencies = [ [[package]] name = "datafusion-datasource" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fde13794244bc7581cd82f6fff217068ed79cdc344cafe4ab2c3a1c3510b38d6" +source = "git+https://github.com/apache/datafusion?branch=branch-52#aee5cd9f3517b2ac9536fd4eb254f1e1349711df" dependencies = [ "arrow", "async-compression", "async-trait", "bytes", - "bzip2 0.6.1", + "bzip2", "chrono", "datafusion-common", "datafusion-common-runtime", @@ -1948,21 +1941,20 @@ dependencies = [ "futures", "glob", "itertools 0.14.0", + "liblzma", "log", "object_store", "rand 0.9.2", "tokio", "tokio-util", "url", - "xz2", "zstd", ] [[package]] name = "datafusion-datasource-arrow" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "804fa9b4ecf3157982021770617200ef7c1b2979d57bec9044748314775a9aea" +source = "git+https://github.com/apache/datafusion?branch=branch-52#aee5cd9f3517b2ac9536fd4eb254f1e1349711df" dependencies = [ "arrow", "arrow-ipc", @@ -1985,8 +1977,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61a1641a40b259bab38131c5e6f48fac0717bedb7dc93690e604142a849e0568" +source = "git+https://github.com/apache/datafusion?branch=branch-52#aee5cd9f3517b2ac9536fd4eb254f1e1349711df" dependencies = [ "arrow", "async-trait", @@ -2008,8 +1999,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adeacdb00c1d37271176f8fb6a1d8ce096baba16ea7a4b2671840c5c9c64fe85" +source = "git+https://github.com/apache/datafusion?branch=branch-52#aee5cd9f3517b2ac9536fd4eb254f1e1349711df" dependencies = [ "arrow", "async-trait", @@ -2030,8 +2020,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-parquet" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43d0b60ffd66f28bfb026565d62b0a6cbc416da09814766a3797bba7d85a3cd9" +source = "git+https://github.com/apache/datafusion?branch=branch-52#aee5cd9f3517b2ac9536fd4eb254f1e1349711df" dependencies = [ "arrow", "async-trait", @@ -2060,17 +2049,16 @@ dependencies = [ [[package]] name = "datafusion-doc" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b99e13947667b36ad713549237362afb054b2d8f8cc447751e23ec61202db07" +source = "git+https://github.com/apache/datafusion?branch=branch-52#aee5cd9f3517b2ac9536fd4eb254f1e1349711df" [[package]] name = "datafusion-execution" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63695643190679037bc946ad46a263b62016931547bf119859c511f7ff2f5178" +source = "git+https://github.com/apache/datafusion?branch=branch-52#aee5cd9f3517b2ac9536fd4eb254f1e1349711df" dependencies = [ "arrow", "async-trait", + "chrono", "dashmap", "datafusion-common", "datafusion-expr", @@ -2087,8 +2075,7 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9a4787cbf5feb1ab351f789063398f67654a6df75c4d37d7f637dc96f951a91" +source = "git+https://github.com/apache/datafusion?branch=branch-52#aee5cd9f3517b2ac9536fd4eb254f1e1349711df" dependencies = [ "arrow", "async-trait", @@ -2109,8 +2096,7 @@ dependencies = [ [[package]] name = "datafusion-expr-common" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ce2fb1b8c15c9ac45b0863c30b268c69dc9ee7a1ee13ecf5d067738338173dc" +source = "git+https://github.com/apache/datafusion?branch=branch-52#aee5cd9f3517b2ac9536fd4eb254f1e1349711df" dependencies = [ "arrow", "datafusion-common", @@ -2122,8 +2108,7 @@ dependencies = [ [[package]] name = "datafusion-functions" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "794a9db7f7b96b3346fc007ff25e994f09b8f0511b4cf7dff651fadfe3ebb28f" +source = "git+https://github.com/apache/datafusion?branch=branch-52#aee5cd9f3517b2ac9536fd4eb254f1e1349711df" dependencies = [ "arrow", "arrow-buffer", @@ -2131,6 +2116,7 @@ dependencies = [ "blake2", "blake3", "chrono", + "chrono-tz", "datafusion-common", "datafusion-doc", "datafusion-execution", @@ -2152,8 +2138,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c25210520a9dcf9c2b2cbbce31ebd4131ef5af7fc60ee92b266dc7d159cb305" +source = "git+https://github.com/apache/datafusion?branch=branch-52#aee5cd9f3517b2ac9536fd4eb254f1e1349711df" dependencies = [ "ahash 0.8.12", "arrow", @@ -2173,8 +2158,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62f4a66f3b87300bb70f4124b55434d2ae3fe80455f3574701d0348da040b55d" +source = "git+https://github.com/apache/datafusion?branch=branch-52#aee5cd9f3517b2ac9536fd4eb254f1e1349711df" dependencies = [ "ahash 0.8.12", "arrow", @@ -2186,8 +2170,7 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae5c06eed03918dc7fe7a9f082a284050f0e9ecf95d72f57712d1496da03b8c4" +source = "git+https://github.com/apache/datafusion?branch=branch-52#aee5cd9f3517b2ac9536fd4eb254f1e1349711df" dependencies = [ "arrow", "arrow-ord", @@ -2209,8 +2192,7 @@ dependencies = [ [[package]] name = "datafusion-functions-table" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db4fed1d71738fbe22e2712d71396db04c25de4111f1ec252b8f4c6d3b25d7f5" +source = "git+https://github.com/apache/datafusion?branch=branch-52#aee5cd9f3517b2ac9536fd4eb254f1e1349711df" dependencies = [ "arrow", "async-trait", @@ -2225,8 +2207,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d92206aa5ae21892f1552b4d61758a862a70956e6fd7a95cb85db1de74bc6d1" +source = "git+https://github.com/apache/datafusion?branch=branch-52#aee5cd9f3517b2ac9536fd4eb254f1e1349711df" dependencies = [ "arrow", "datafusion-common", @@ -2243,8 +2224,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53ae9bcc39800820d53a22d758b3b8726ff84a5a3e24cecef04ef4e5fdf1c7cc" +source = "git+https://github.com/apache/datafusion?branch=branch-52#aee5cd9f3517b2ac9536fd4eb254f1e1349711df" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2253,19 +2233,17 @@ dependencies = [ [[package]] name = "datafusion-macros" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1063ad4c9e094b3f798acee16d9a47bd7372d9699be2de21b05c3bd3f34ab848" +source = "git+https://github.com/apache/datafusion?branch=branch-52#aee5cd9f3517b2ac9536fd4eb254f1e1349711df" dependencies = [ "datafusion-doc", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] name = "datafusion-optimizer" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f35f9ec5d08b87fd1893a30c2929f2559c2f9806ca072d8fefca5009dc0f06a" +source = "git+https://github.com/apache/datafusion?branch=branch-52#aee5cd9f3517b2ac9536fd4eb254f1e1349711df" dependencies = [ "arrow", "chrono", @@ -2283,8 +2261,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c30cc8012e9eedcb48bbe112c6eff4ae5ed19cf3003cb0f505662e88b7014c5d" +source = "git+https://github.com/apache/datafusion?branch=branch-52#aee5cd9f3517b2ac9536fd4eb254f1e1349711df" dependencies = [ "ahash 0.8.12", "arrow", @@ -2294,19 +2271,19 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-physical-expr-common", "half", - "hashbrown 0.14.5", + "hashbrown 0.16.1", "indexmap 2.12.1", "itertools 0.14.0", "parking_lot", "paste", "petgraph 0.8.3", + "tokio", ] [[package]] name = "datafusion-physical-expr-adapter" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f9ff2dbd476221b1f67337699eff432781c4e6e1713d2aefdaa517dfbf79768" +source = "git+https://github.com/apache/datafusion?branch=branch-52#aee5cd9f3517b2ac9536fd4eb254f1e1349711df" dependencies = [ "arrow", "datafusion-common", @@ -2320,22 +2297,23 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90da43e1ec550b172f34c87ec68161986ced70fd05c8d2a2add66eef9c276f03" +source = "git+https://github.com/apache/datafusion?branch=branch-52#aee5cd9f3517b2ac9536fd4eb254f1e1349711df" dependencies = [ "ahash 0.8.12", "arrow", + "chrono", "datafusion-common", "datafusion-expr-common", - "hashbrown 0.14.5", + "hashbrown 0.16.1", + "indexmap 2.12.1", "itertools 0.14.0", + "parking_lot", ] [[package]] name = "datafusion-physical-optimizer" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce9804f799acd7daef3be7aaffe77c0033768ed8fdbf5fb82fc4c5f2e6bc14e6" +source = "git+https://github.com/apache/datafusion?branch=branch-52#aee5cd9f3517b2ac9536fd4eb254f1e1349711df" dependencies = [ "arrow", "datafusion-common", @@ -2352,26 +2330,25 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0acf0ad6b6924c6b1aa7d213b181e012e2d3ec0a64ff5b10ee6282ab0f8532ac" +source = "git+https://github.com/apache/datafusion?branch=branch-52#aee5cd9f3517b2ac9536fd4eb254f1e1349711df" dependencies = [ "ahash 0.8.12", "arrow", "arrow-ord", "arrow-schema", "async-trait", - "chrono", "datafusion-common", "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", + "datafusion-functions", "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr", "datafusion-physical-expr-common", "futures", "half", - "hashbrown 0.14.5", + "hashbrown 0.16.1", "indexmap 2.12.1", "itertools 0.14.0", "log", @@ -2383,8 +2360,7 @@ dependencies = [ [[package]] name = "datafusion-pruning" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac2c2498a1f134a9e11a9f5ed202a2a7d7e9774bd9249295593053ea3be999db" +source = "git+https://github.com/apache/datafusion?branch=branch-52#aee5cd9f3517b2ac9536fd4eb254f1e1349711df" dependencies = [ "arrow", "datafusion-common", @@ -2400,8 +2376,7 @@ dependencies = [ [[package]] name = "datafusion-session" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f96eebd17555386f459037c65ab73aae8df09f464524c709d6a3134ad4f4776" +source = "git+https://github.com/apache/datafusion?branch=branch-52#aee5cd9f3517b2ac9536fd4eb254f1e1349711df" dependencies = [ "async-trait", "datafusion-common", @@ -2414,8 +2389,7 @@ dependencies = [ [[package]] name = "datafusion-spark" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97a8d6fed24c80dd403dcc6afec33766a599d1b72575f222237f01429b2e58ba" +source = "git+https://github.com/apache/datafusion?branch=branch-52#aee5cd9f3517b2ac9536fd4eb254f1e1349711df" dependencies = [ "arrow", "bigdecimal", @@ -2426,7 +2400,9 @@ dependencies = [ "datafusion-execution", "datafusion-expr", "datafusion-functions", + "datafusion-functions-nested", "log", + "percent-encoding", "rand 0.9.2", "sha1", "url", @@ -2435,8 +2411,7 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fc195fe60634b2c6ccfd131b487de46dc30eccae8a3c35a13f136e7f440414f" +source = "git+https://github.com/apache/datafusion?branch=branch-52#aee5cd9f3517b2ac9536fd4eb254f1e1349711df" dependencies = [ "arrow", "bigdecimal", @@ -2486,7 +2461,7 @@ dependencies = [ "darling 0.20.11", "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -2496,28 +2471,28 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" dependencies = [ "derive_builder_core", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] name = "derive_more" -version = "2.1.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10b768e943bed7bf2cab53df09f4bc34bfd217cdb57d971e769874c9a6710618" +checksum = "d751e9e49156b02b44f9c1815bcb94b984cdcc4396ecc32521c739452808b134" dependencies = [ "derive_more-impl", ] [[package]] name = "derive_more-impl" -version = "2.1.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d286bfdaf75e988b4a78e013ecd79c581e06399ab53fbacd2d916c2f904f30b" +checksum = "799a97264921d8623a957f6c3b9011f3b5492f557bbb7a5a19b7fa6d06ba8dcb" dependencies = [ "proc-macro2", "quote", "rustc_version", - "syn 2.0.111", + "syn 2.0.114", "unicode-xid", ] @@ -2547,7 +2522,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -2600,7 +2575,7 @@ checksum = "44f23cf4b44bfce11a86ace86f8a73ffdec849c9fd00a386a53d278bd9e81fb3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -2664,9 +2639,9 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "find-msvc-tools" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844" +checksum = "645cbb3a84e60b7531617d5ae4e57f7e27308f6445f5abf653209ea76dec8dff" [[package]] name = "findshlibs" @@ -2688,9 +2663,9 @@ checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" [[package]] name = "flatbuffers" -version = "25.9.23" +version = "25.12.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09b6620799e7340ebd9968d2e0708eb82cf1971e9a16821e2091b6d6e475eed5" +checksum = "35f6839d7b3b98adde531effaf34f0c2badc6f4735d26fe74709d8e513a96ef3" dependencies = [ "bitflags 2.10.0", "rustc_version", @@ -2719,6 +2694,12 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +[[package]] +name = "foldhash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" + [[package]] name = "form_urlencoded" version = "1.2.2" @@ -2823,7 +2804,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -2838,12 +2819,6 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" -[[package]] -name = "futures-timer" -version = "3.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" - [[package]] name = "futures-util" version = "0.3.31" @@ -2925,9 +2900,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.4.12" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3c0b69cfcb4e1b9f1bf2f53f95f766e4661169728ec61cd3fe5a0166f2d1386" +checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54" dependencies = [ "atomic-waker", "bytes", @@ -2968,10 +2943,6 @@ name = "hashbrown" version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" -dependencies = [ - "ahash 0.8.12", - "allocator-api2", -] [[package]] name = "hashbrown" @@ -2979,7 +2950,7 @@ version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ - "foldhash", + "foldhash 0.1.5", ] [[package]] @@ -2987,6 +2958,11 @@ name = "hashbrown" version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash 0.2.0", +] [[package]] name = "hdfs-sys" @@ -3421,9 +3397,9 @@ checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" [[package]] name = "iri-string" -version = "0.7.9" +version = "0.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f867b9d1d896b67beb18518eda36fdb77a32ea590de864f1325b294a6d14397" +checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a" dependencies = [ "memchr", "serde", @@ -3460,9 +3436,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.15" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" [[package]] name = "java-locator" @@ -3475,9 +3451,9 @@ dependencies = [ [[package]] name = "jiff" -version = "0.2.16" +version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49cce2b81f2098e7e3efc35bc2e0a6b7abec9d34128283d7a26fa8f32a6dbb35" +checksum = "e67e8da4c49d6d9909fe03361f9b620f58898859f5c7aded68351e85e71ecf50" dependencies = [ "jiff-static", "jiff-tzdb-platform", @@ -3490,13 +3466,13 @@ dependencies = [ [[package]] name = "jiff-static" -version = "0.2.16" +version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "980af8b43c3ad5d8d349ace167ec8170839f753a42d233ba19e08afe1850fa69" +checksum = "e0c84ee7f197eca9a86c6fd6cb771e55eb991632f15f2bc3ca6ec838929e6e78" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -3668,6 +3644,26 @@ dependencies = [ "windows-link", ] +[[package]] +name = "liblzma" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73c36d08cad03a3fbe2c4e7bb3a9e84c57e4ee4135ed0b065cade3d98480c648" +dependencies = [ + "liblzma-sys", +] + +[[package]] +name = "liblzma-sys" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01b9596486f6d60c3bbe644c0e1be1aa6ccc472ad630fe8927b456973d7cb736" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "libm" version = "0.2.15" @@ -3686,9 +3682,9 @@ dependencies = [ [[package]] name = "libz-rs-sys" -version = "0.5.4" +version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15413ef615ad868d4d65dce091cb233b229419c7c0c4bcaa746c0901c49ff39c" +checksum = "c10501e7805cee23da17c7790e59df2870c0d4043ec6d03f67d31e2b53e77415" dependencies = [ "zlib-rs", ] @@ -3780,17 +3776,6 @@ dependencies = [ "twox-hash", ] -[[package]] -name = "lzma-sys" -version = "0.1.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" -dependencies = [ - "cc", - "libc", - "pkg-config", -] - [[package]] name = "md-5" version = "0.10.6" @@ -3860,9 +3845,9 @@ checksum = "dce6dd36094cac388f119d2e9dc82dc730ef91c32a6222170d630e5414b956e6" [[package]] name = "moka" -version = "0.12.11" +version = "0.12.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8261cd88c312e0004c1d51baad2980c66528dfdb2bee62003e643a4d8f86b077" +checksum = "a3dec6bd31b08944e08b58fd99373893a6c17054d6f3ea5006cc894f4f4eee2a" dependencies = [ "async-lock", "crossbeam-channel", @@ -3873,7 +3858,6 @@ dependencies = [ "futures-util", "parking_lot", "portable-atomic", - "rustc_version", "smallvec", "tagptr", "uuid", @@ -4110,9 +4094,9 @@ dependencies = [ [[package]] name = "openssl-probe" -version = "0.1.6" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" +checksum = "9f50d9b3dabb09ecd771ad0aa242ca6894994c130308ca3d7684634df8037391" [[package]] name = "ordered-float" @@ -4337,7 +4321,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -4413,9 +4397,9 @@ dependencies = [ [[package]] name = "portable-atomic" -version = "1.11.1" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" +checksum = "f89776e4d69bb58bc6993e99ffa1d11f228b839984854c7daeb5d37f87cbe950" [[package]] name = "portable-atomic-util" @@ -4479,7 +4463,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -4493,9 +4477,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.103" +version = "1.0.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +checksum = "535d180e0ecab6268a3e718bb9fd44db66bbbc256257165fc699dadf70d16fe7" dependencies = [ "unicode-ident", ] @@ -4550,7 +4534,7 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.111", + "syn 2.0.114", "tempfile", ] @@ -4564,7 +4548,7 @@ dependencies = [ "itertools 0.14.0", "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -4688,9 +4672,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.42" +version = "1.0.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +checksum = "dc74d9a594b72ae6656596548f56f667211f8a97b3d4c3d467150794690dc40a" dependencies = [ "proc-macro2", ] @@ -4812,7 +4796,7 @@ checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -4850,12 +4834,6 @@ version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" -[[package]] -name = "relative-path" -version = "1.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" - [[package]] name = "rend" version = "0.4.2" @@ -4962,9 +4940,9 @@ dependencies = [ [[package]] name = "rkyv" -version = "0.7.45" +version = "0.7.46" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9008cd6385b9e161d8229e1f6549dd23c3d022f132a2ea37ac3a10ac4935779b" +checksum = "2297bf9c81a3f0dc96bc9521370b88f054168c29826a75e89c55ff196e7ed6a1" dependencies = [ "bitvec", "bytecheck", @@ -4980,9 +4958,9 @@ dependencies = [ [[package]] name = "rkyv_derive" -version = "0.7.45" +version = "0.7.46" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "503d1d27590a2b0a3a4ca4c94755aa2875657196ecbf401a42eff41d7de532c0" +checksum = "84d7b42d4b8d06048d3ac8db0eb31bcb942cbeb709f0b5f2b2ebde398d3038f5" dependencies = [ "proc-macro2", "quote", @@ -4999,35 +4977,6 @@ dependencies = [ "byteorder", ] -[[package]] -name = "rstest" -version = "0.26.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5a3193c063baaa2a95a33f03035c8a72b83d97a54916055ba22d35ed3839d49" -dependencies = [ - "futures-timer", - "futures-util", - "rstest_macros", -] - -[[package]] -name = "rstest_macros" -version = "0.26.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c845311f0ff7951c5506121a9ad75aec44d083c31583b2ea5a30bcb0b0abba0" -dependencies = [ - "cfg-if", - "glob", - "proc-macro-crate", - "proc-macro2", - "quote", - "regex", - "relative-path", - "rustc_version", - "syn 2.0.111", - "unicode-ident", -] - [[package]] name = "rust-ini" version = "0.21.3" @@ -5109,9 +5058,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.35" +version = "0.23.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "533f54bc6a7d4f647e46ad909549eda97bf5afc1585190ef692b4286b198bd8f" +checksum = "c665f33d38cea657d9614f766881e4d510e0eda4239891eea56b4cadcf01801b" dependencies = [ "aws-lc-rs", "once_cell", @@ -5124,9 +5073,9 @@ dependencies = [ [[package]] name = "rustls-native-certs" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9980d917ebb0c0536119ba501e90834767bffc3d60641457fd84a1f3fd337923" +checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63" dependencies = [ "openssl-probe", "rustls-pki-types", @@ -5173,9 +5122,9 @@ checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "ryu" -version = "1.0.20" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +checksum = "a50f4cf475b65d88e057964e0e9bb1f0aa9bbb2036dc65c64596b42932536984" [[package]] name = "same-file" @@ -5209,9 +5158,9 @@ dependencies = [ [[package]] name = "schemars" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9558e172d4e8533736ba97870c4b2cd63f84b382a3d6eb063da41b91cce17289" +checksum = "54e910108742c57a770f492731f99be216a52fadd361b06c8fb59d74ccc267d2" dependencies = [ "dyn-clone", "ref-cast", @@ -5313,14 +5262,14 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] name = "serde_json" -version = "1.0.148" +version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3084b546a1dd6289475996f182a22aba973866ea8e8b02c51d9f46b1336a22da" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ "itoa", "memchr", @@ -5337,7 +5286,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -5364,7 +5313,7 @@ dependencies = [ "indexmap 1.9.3", "indexmap 2.12.1", "schemars 0.9.0", - "schemars 1.1.0", + "schemars 1.2.0", "serde_core", "serde_json", "serde_with_macros", @@ -5380,7 +5329,7 @@ dependencies = [ "darling 0.21.3", "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -5426,10 +5375,11 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "signal-hook-registry" -version = "1.4.7" +version = "1.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7664a098b8e616bdfcc2dc0e9ac44eb231eedf41db4e9fe95d8d32ec728dedad" +checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b" dependencies = [ + "errno", "libc", ] @@ -5506,7 +5456,7 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -5545,7 +5495,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -5590,9 +5540,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.111" +version = "2.0.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" +checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" dependencies = [ "proc-macro2", "quote", @@ -5616,7 +5566,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -5670,7 +5620,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -5681,7 +5631,7 @@ checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -5836,7 +5786,7 @@ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -5851,9 +5801,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.17" +version = "0.7.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2efa149fe76073d6e8fd97ef4f4eca7b67f599660115591483572e406e165594" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" dependencies = [ "bytes", "futures-core", @@ -5956,7 +5906,7 @@ checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -6000,7 +5950,7 @@ checksum = "3c36781cc0e46a83726d9879608e4cf6c2505237e263a8eb8c24502989cfdb28" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -6065,9 +6015,9 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "url" -version = "2.5.7" +version = "2.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08bc136a29a3d1758e07a9cca267be308aeebf5cfd5a10f3f67ab2097683ef5b" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" dependencies = [ "form_urlencoded", "idna", @@ -6196,7 +6146,7 @@ dependencies = [ "bumpalo", "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", "wasm-bindgen-shared", ] @@ -6244,9 +6194,9 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "1.0.4" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2878ef029c47c6e8cf779119f20fcf52bde7ad42a731b2a304bc221df17571e" +checksum = "12bed680863276c63889429bfd6cab3b99943659923822de1c8a39c49e4d722c" dependencies = [ "rustls-pki-types", ] @@ -6315,7 +6265,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -6326,7 +6276,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -6620,15 +6570,6 @@ version = "0.13.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" -[[package]] -name = "xz2" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" -dependencies = [ - "lzma-sys", -] - [[package]] name = "yoke" version = "0.8.1" @@ -6648,28 +6589,28 @@ checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", "synstructure", ] [[package]] name = "zerocopy" -version = "0.8.31" +version = "0.8.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3" +checksum = "1fabae64378cb18147bb18bca364e63bdbe72a0ffe4adf0addfec8aa166b2c56" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.31" +version = "0.8.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a" +checksum = "c9c2d862265a8bb4471d87e033e730f536e2a285cc7cb05dbce09a2a97075f90" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -6689,7 +6630,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", "synstructure", ] @@ -6729,20 +6670,20 @@ checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] name = "zlib-rs" -version = "0.5.4" +version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51f936044d677be1a1168fae1d03b583a285a5dd9d8cbf7b24c23aa1fc775235" +checksum = "40990edd51aae2c2b6907af74ffb635029d5788228222c4bb811e9351c0caad3" [[package]] name = "zmij" -version = "1.0.2" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f4a4e8e9dc5c62d159f04fcdbe07f4c3fb710415aab4754bf11505501e3251d" +checksum = "2fc5a66a20078bf1251bde995aa2fdcc4b800c70b5d92dd2c62abc5c60f679f8" [[package]] name = "zstd" diff --git a/native/Cargo.toml b/native/Cargo.toml index 554534cc2b..4adf29a6d8 100644 --- a/native/Cargo.toml +++ b/native/Cargo.toml @@ -38,9 +38,9 @@ arrow = { version = "57.0.0", features = ["prettyprint", "ffi", "chrono-tz"] } async-trait = { version = "0.1" } bytes = { version = "1.10.0" } parquet = { version = "57.0.0", default-features = false, features = ["experimental"] } -datafusion = { version = "51.0.0", default-features = false, features = ["unicode_expressions", "crypto_expressions", "nested_expressions", "parquet"] } -datafusion-datasource = { version = "51.0.0" } -datafusion-spark = { version = "51.0.0" } +datafusion = { git = "https://github.com/apache/datafusion", branch = "branch-52", default-features = false, features = ["unicode_expressions", "crypto_expressions", "nested_expressions", "parquet"] } +datafusion-datasource = { git = "https://github.com/apache/datafusion", branch = "branch-52" } +datafusion-spark = { git = "https://github.com/apache/datafusion", branch = "branch-52" } datafusion-comet-spark-expr = { path = "spark-expr" } datafusion-comet-proto = { path = "proto" } chrono = { version = "0.4", default-features = false, features = ["clock"] } diff --git a/native/core/Cargo.toml b/native/core/Cargo.toml index 7b32be36a2..425366404a 100644 --- a/native/core/Cargo.toml +++ b/native/core/Cargo.toml @@ -95,7 +95,7 @@ jni = { version = "0.21", features = ["invocation"] } lazy_static = "1.4" assertables = "9" hex = "0.4.3" -datafusion-functions-nested = { version = "51.0.0" } +datafusion-functions-nested = { git = "https://github.com/apache/datafusion", branch = "branch-52" } [features] backtrace = ["datafusion/backtrace"] diff --git a/native/core/src/execution/operators/iceberg_scan.rs b/native/core/src/execution/operators/iceberg_scan.rs index ff6648850b..f46b8f91c2 100644 --- a/native/core/src/execution/operators/iceberg_scan.rs +++ b/native/core/src/execution/operators/iceberg_scan.rs @@ -42,10 +42,9 @@ use iceberg::io::FileIO; use crate::execution::operators::ExecutionError; use crate::parquet::parquet_support::SparkParquetOptions; -use crate::parquet::schema_adapter::SparkSchemaAdapterFactory; -use datafusion::datasource::schema_adapter::SchemaAdapterFactory; use datafusion_comet_spark_expr::EvalMode; use datafusion_datasource::file_stream::FileStreamMetrics; +use crate::parquet::schema_adapter::adapt_batch_with_expressions; /// Iceberg table scan operator that uses iceberg-rust to read Iceberg tables. /// @@ -299,22 +298,14 @@ impl IcebergFileStream { .map_err(|e| DataFusionError::Execution(format!("Iceberg scan error: {}", e))) .and_then(move |batch| { let spark_options = SparkParquetOptions::new(EvalMode::Legacy, "UTC", false); - let adapter_factory = SparkSchemaAdapterFactory::new(spark_options, None); - let file_schema = batch.schema(); - let adapter = adapter_factory - .create(Arc::clone(&target_schema), Arc::clone(&file_schema)); - - let result = match adapter.map_schema(file_schema.as_ref()) { - Ok((schema_mapper, _projection)) => { - schema_mapper.map_batch(batch).map_err(|e| { - DataFusionError::Execution(format!("Batch mapping failed: {}", e)) - }) - } - Err(e) => Err(DataFusionError::Execution(format!( - "Schema mapping failed: {}", - e - ))), - }; + let result = + adapt_batch_with_expressions(batch, &target_schema, &spark_options) + .map_err(|e| { + DataFusionError::Execution(format!( + "Batch adaptation failed: {}", + e + )) + }); futures::future::ready(result) }); diff --git a/native/core/src/execution/planner.rs b/native/core/src/execution/planner.rs index 93fbb59c11..92af398b4e 100644 --- a/native/core/src/execution/planner.rs +++ b/native/core/src/execution/planner.rs @@ -1049,8 +1049,8 @@ impl PhysicalPlanner { .as_any() .downcast_ref::() .ok_or_else(|| { - GeneralError("Expected literal of default value.".to_string()) - })?; + GeneralError("Expected literal of default value.".to_string()) + })?; Ok(df_literal.value().clone()) }) .collect(); @@ -1093,18 +1093,11 @@ impl PhysicalPlanner { let files = self.get_partitioned_files(&scan.file_partitions[self.partition as usize])?; let file_groups: Vec> = vec![files]; - let partition_fields: Vec = partition_schema - .fields() - .iter() - .map(|field| { - Field::new(field.name(), field.data_type().clone(), field.is_nullable()) - }) - .collect_vec(); + let scan = init_datasource_exec( required_schema, Some(data_schema), Some(partition_schema), - Some(partition_fields), object_store_url, file_groups, Some(projection_vector), @@ -3385,6 +3378,7 @@ mod tests { use arrow::array::{Array, DictionaryArray, Int32Array, ListArray, RecordBatch, StringArray}; use arrow::datatypes::{DataType, Field, FieldRef, Fields, Schema}; use datafusion::catalog::memory::DataSourceExec; + use datafusion::config::TableParquetOptions; use datafusion::datasource::listing::PartitionedFile; use datafusion::datasource::object_store::ObjectStoreUrl; use datafusion::datasource::physical_plan::{ @@ -3401,8 +3395,6 @@ mod tests { use crate::execution::operators::ExecutionError; use crate::execution::planner::literal_to_array_ref; - use crate::parquet::parquet_support::SparkParquetOptions; - use crate::parquet::schema_adapter::SparkSchemaAdapterFactory; use datafusion_comet_proto::spark_expression::expr::ExprStruct; use datafusion_comet_proto::spark_expression::ListLiteral; use datafusion_comet_proto::{ @@ -3412,7 +3404,6 @@ mod tests { spark_operator, spark_operator::{operator::OpStruct, Operator}, }; - use datafusion_comet_spark_expr::EvalMode; #[test] fn test_unpack_dictionary_primitive() { @@ -4004,18 +3995,15 @@ mod tests { } } - let source = ParquetSource::default().with_schema_adapter_factory(Arc::new( - SparkSchemaAdapterFactory::new( - SparkParquetOptions::new(EvalMode::Ansi, "", false), - None, - ), - ))?; + let source = Arc::new( + ParquetSource::new(Arc::new(read_schema.clone())) + .with_table_parquet_options(TableParquetOptions::new()), + ) as Arc; let object_store_url = ObjectStoreUrl::local_filesystem(); - let file_scan_config = - FileScanConfigBuilder::new(object_store_url, read_schema.into(), source) - .with_file_groups(file_groups) - .build(); + let file_scan_config = FileScanConfigBuilder::new(object_store_url, source) + .with_file_groups(file_groups) + .build(); // Run native read let scan = Arc::new(DataSourceExec::new(Arc::new(file_scan_config.clone()))); diff --git a/native/core/src/parquet/mod.rs b/native/core/src/parquet/mod.rs index c8a480e97a..4934a5ad4b 100644 --- a/native/core/src/parquet/mod.rs +++ b/native/core/src/parquet/mod.rs @@ -765,7 +765,6 @@ pub unsafe extern "system" fn Java_org_apache_comet_parquet_Native_initRecordBat required_schema, Some(data_schema), None, - None, object_store_url, file_groups, None, diff --git a/native/core/src/parquet/parquet_exec.rs b/native/core/src/parquet/parquet_exec.rs index ec18d227f5..92bbaaeeee 100644 --- a/native/core/src/parquet/parquet_exec.rs +++ b/native/core/src/parquet/parquet_exec.rs @@ -15,10 +15,10 @@ // specific language governing permissions and limitations // under the License. +use std::collections::HashMap; use crate::execution::operators::ExecutionError; use crate::parquet::encryption_support::{CometEncryptionConfig, ENCRYPTION_FACTORY_ID}; use crate::parquet::parquet_support::SparkParquetOptions; -use crate::parquet::schema_adapter::SparkSchemaAdapterFactory; use arrow::datatypes::{Field, SchemaRef}; use datafusion::config::TableParquetOptions; use datafusion::datasource::listing::PartitionedFile; @@ -30,11 +30,12 @@ use datafusion::execution::object_store::ObjectStoreUrl; use datafusion::physical_expr::expressions::BinaryExpr; use datafusion::physical_expr::PhysicalExpr; use datafusion::prelude::SessionContext; -use datafusion::scalar::ScalarValue; use datafusion_comet_spark_expr::EvalMode; -use itertools::Itertools; -use std::collections::HashMap; +use datafusion_datasource::TableSchema; use std::sync::Arc; +use datafusion::physical_expr_adapter::PhysicalExprAdapterFactory; +use datafusion::scalar::ScalarValue; +use crate::parquet::schema_adapter::SparkPhysicalExprAdapterFactory; /// Initializes a DataSourceExec plan with a ParquetSource. This may be used by either the /// `native_datafusion` scan or the `native_iceberg_compat` scan. @@ -60,7 +61,6 @@ pub(crate) fn init_datasource_exec( required_schema: SchemaRef, data_schema: Option, partition_schema: Option, - partition_fields: Option>, object_store_url: ObjectStoreUrl, file_groups: Vec>, projection_vector: Option>, @@ -78,7 +78,26 @@ pub(crate) fn init_datasource_exec( encryption_enabled, ); - let mut parquet_source = ParquetSource::new(table_parquet_options); + // Determine the schema to use for ParquetSource + let table_schema = if let Some(ref data_schema) = data_schema { + if let Some(ref partition_schema) = partition_schema { + let partition_fields: Vec<_> = partition_schema + .fields() + .iter() + .map(|f| { + Arc::new(Field::new(f.name(), f.data_type().clone(), f.is_nullable())) as _ + }) + .collect(); + TableSchema::new(Arc::clone(data_schema), partition_fields) + } else { + TableSchema::from_file_schema(Arc::clone(data_schema)) + } + } else { + TableSchema::from_file_schema(Arc::clone(&required_schema)) + }; + + let mut parquet_source = + ParquetSource::new(table_schema).with_table_parquet_options(table_parquet_options); // Create a conjunctive form of the vector because ParquetExecBuilder takes // a single expression @@ -104,37 +123,26 @@ pub(crate) fn init_datasource_exec( ); } - let file_source = parquet_source.with_schema_adapter_factory(Arc::new( - SparkSchemaAdapterFactory::new(spark_parquet_options, default_values), - ))?; + let expr_adapter_factory: Arc = Arc::new( + SparkPhysicalExprAdapterFactory::new(spark_parquet_options, default_values), + ); + + let file_source: Arc = Arc::new(parquet_source); let file_groups = file_groups .iter() .map(|files| FileGroup::new(files.clone())) .collect(); - let file_scan_config = match (data_schema, projection_vector, partition_fields) { - (Some(data_schema), Some(projection_vector), Some(partition_fields)) => { - get_file_config_builder( - data_schema, - partition_schema, - file_groups, - object_store_url, - file_source, - ) - .with_projection_indices(Some(projection_vector)) - .with_table_partition_cols(partition_fields) - .build() - } - _ => get_file_config_builder( - required_schema, - partition_schema, - file_groups, - object_store_url, - file_source, - ) - .build(), - }; + let mut file_scan_config_builder = + FileScanConfigBuilder::new(object_store_url, file_source).with_file_groups(file_groups); + + if let Some(projection_vector) = projection_vector { + file_scan_config_builder = + file_scan_config_builder.with_projection_indices(Some(projection_vector))?; + } + + let file_scan_config = file_scan_config_builder.with_expr_adapter(Some(expr_adapter_factory)).build(); Ok(Arc::new(DataSourceExec::new(Arc::new(file_scan_config)))) } @@ -165,28 +173,3 @@ fn get_options( (table_parquet_options, spark_parquet_options) } - -fn get_file_config_builder( - schema: SchemaRef, - partition_schema: Option, - file_groups: Vec, - object_store_url: ObjectStoreUrl, - file_source: Arc, -) -> FileScanConfigBuilder { - match partition_schema { - Some(partition_schema) => { - let partition_fields: Vec = partition_schema - .fields() - .iter() - .map(|field| { - Field::new(field.name(), field.data_type().clone(), field.is_nullable()) - }) - .collect_vec(); - FileScanConfigBuilder::new(object_store_url, Arc::clone(&schema), file_source) - .with_file_groups(file_groups) - .with_table_partition_cols(partition_fields) - } - _ => FileScanConfigBuilder::new(object_store_url, Arc::clone(&schema), file_source) - .with_file_groups(file_groups), - } -} diff --git a/native/core/src/parquet/schema_adapter.rs b/native/core/src/parquet/schema_adapter.rs index b321d902a9..e08b7d7140 100644 --- a/native/core/src/parquet/schema_adapter.rs +++ b/native/core/src/parquet/schema_adapter.rs @@ -15,284 +15,221 @@ // specific language governing permissions and limitations // under the License. -//! Custom schema adapter that uses Spark-compatible conversions - -use crate::parquet::parquet_support::{spark_parquet_convert, SparkParquetOptions}; -use arrow::array::{RecordBatch, RecordBatchOptions}; -use arrow::datatypes::{Schema, SchemaRef}; -use datafusion::common::ColumnStatistics; -use datafusion::datasource::schema_adapter::{SchemaAdapter, SchemaAdapterFactory, SchemaMapper}; -use datafusion::physical_plan::ColumnarValue; +//! Spark-compatible schema mapping for runtime batch transformation + +use crate::parquet::parquet_support::SparkParquetOptions; +use arrow::array::{ArrayRef, RecordBatch}; +use arrow::datatypes::SchemaRef; +use datafusion::common::tree_node::TreeNode; +use datafusion::common::tree_node::{Transformed, TransformedResult}; +use datafusion::common::Result as DataFusionResult; +use datafusion::physical_expr::expressions::Column; +use datafusion::physical_expr::PhysicalExpr; +use datafusion::physical_expr_adapter::{replace_columns_with_literals, DefaultPhysicalExprAdapterFactory, PhysicalExprAdapter, PhysicalExprAdapterFactory}; use datafusion::scalar::ScalarValue; +use datafusion_comet_spark_expr::{Cast, SparkCastOptions}; use std::collections::HashMap; use std::sync::Arc; -/// An implementation of DataFusion's `SchemaAdapterFactory` that uses a Spark-compatible -/// `cast` implementation. +/// Factory for creating [`SparkSchemaMapper`] instances. +/// +/// This replaces the deprecated DataFusion `SchemaAdapterFactory` with a standalone +/// implementation that performs runtime batch transformation using Spark-compatible +/// type conversions. #[derive(Clone, Debug)] -pub struct SparkSchemaAdapterFactory { - /// Spark cast options +pub struct SparkPhysicalExprAdapterFactory { + /// Spark-specific parquet options for type conversions parquet_options: SparkParquetOptions, + /// Default values for columns that may be missing from the physical schema. + /// The key is the column index in the logical schema. default_values: Option>, } -impl SparkSchemaAdapterFactory { +impl SparkPhysicalExprAdapterFactory { + /// Create a new factory with the given options. pub fn new( - options: SparkParquetOptions, + parquet_options: SparkParquetOptions, default_values: Option>, ) -> Self { Self { - parquet_options: options, + parquet_options, default_values, } } } -impl SchemaAdapterFactory for SparkSchemaAdapterFactory { - /// Create a new factory for mapping batches from a file schema to a table - /// schema. - /// - /// This is a convenience for [`DefaultSchemaAdapterFactory::create`] with - /// the same schema for both the projected table schema and the table - /// schema. +impl PhysicalExprAdapterFactory for SparkPhysicalExprAdapterFactory { fn create( &self, - required_schema: SchemaRef, - _table_schema: SchemaRef, - ) -> Box { - Box::new(SparkSchemaAdapter { - required_schema, + logical_file_schema: SchemaRef, + physical_file_schema: SchemaRef, + ) -> Arc { + let default_factory = DefaultPhysicalExprAdapterFactory; + let default_adapter = default_factory.create( + Arc::clone(&logical_file_schema), + Arc::clone(&physical_file_schema), + ); + + Arc::new(SparkPhysicalExprAdapter { + logical_file_schema, + physical_file_schema, parquet_options: self.parquet_options.clone(), default_values: self.default_values.clone(), + default_adapter, }) } } -/// This SchemaAdapter requires both the table schema and the projected table -/// schema. See [`SchemaMapping`] for more details -#[derive(Clone, Debug)] -pub struct SparkSchemaAdapter { - /// The schema for the table, projected to include only the fields being output (projected) by the - /// associated ParquetExec - required_schema: SchemaRef, - /// Spark cast options +#[derive(Debug)] +struct SparkPhysicalExprAdapter { + /// The logical schema expected by the query + logical_file_schema: SchemaRef, + /// The physical schema of the actual file being read + physical_file_schema: SchemaRef, + /// Spark-specific options for type conversions parquet_options: SparkParquetOptions, + /// Default values for missing columns (keyed by logical schema index) default_values: Option>, + /// The default DataFusion adapter to delegate standard handling to + default_adapter: Arc, } -impl SchemaAdapter for SparkSchemaAdapter { - /// Map a column index in the table schema to a column index in a particular - /// file schema - /// - /// Panics if index is not in range for the table schema - fn map_column_index(&self, index: usize, file_schema: &Schema) -> Option { - let field = self.required_schema.field(index); - Some( - file_schema - .fields - .iter() - .enumerate() - .find(|(_, b)| { - if self.parquet_options.case_sensitive { - b.name() == field.name() - } else { - b.name().to_lowercase() == field.name().to_lowercase() - } - })? - .0, - ) +impl PhysicalExprAdapter for SparkPhysicalExprAdapter { + fn rewrite(&self, expr: Arc) -> DataFusionResult> { + // Step 1: Handle default values for missing columns + let expr = self.replace_missing_with_defaults(expr)?; + + // Step 2: Delegate to default adapter for standard handling + // This handles: missing columns → nulls, type mismatches → CastColumnExpr + let expr = self.default_adapter.rewrite(expr)?; + + // Step 3: Replace CastColumnExpr with Spark-compatible Cast expressions + expr.transform(|e| self.replace_with_spark_cast(e)).data() } +} - /// Creates a `SchemaMapping` for casting or mapping the columns from the - /// file schema to the table schema. - /// - /// If the provided `file_schema` contains columns of a different type to - /// the expected `table_schema`, the method will attempt to cast the array - /// data from the file schema to the table schema where possible. - /// - /// Returns a [`SchemaMapping`] that can be applied to the output batch - /// along with an ordered list of columns to project from the file - fn map_schema( +impl SparkPhysicalExprAdapter { + /// Replace CastColumnExpr (DataFusion's cast) with Spark's Cast expression. + fn replace_with_spark_cast( &self, - file_schema: &Schema, - ) -> datafusion::common::Result<(Arc, Vec)> { - let mut projection = Vec::with_capacity(file_schema.fields().len()); - let mut field_mappings = vec![None; self.required_schema.fields().len()]; - - for (file_idx, file_field) in file_schema.fields.iter().enumerate() { - if let Some((table_idx, _table_field)) = self - .required_schema - .fields() - .iter() - .enumerate() - .find(|(_, b)| { - if self.parquet_options.case_sensitive { - b.name() == file_field.name() - } else { - b.name().to_lowercase() == file_field.name().to_lowercase() - } - }) - { - field_mappings[table_idx] = Some(projection.len()); - projection.push(file_idx); - } + expr: Arc, + ) -> DataFusionResult>> { + // Check for CastColumnExpr and replace with spark_expr::Cast + // CastColumnExpr is in datafusion_physical_expr::expressions + if let Some(cast) = expr + .as_any() + .downcast_ref::() + { + let child = cast.expr().clone(); + let target_type = cast.target_field().data_type().clone(); + + // Create Spark-compatible cast options + let mut cast_options = SparkCastOptions::new( + self.parquet_options.eval_mode, + &self.parquet_options.timezone, + self.parquet_options.allow_incompat, + ); + cast_options.allow_cast_unsigned_ints = self.parquet_options.allow_cast_unsigned_ints; + cast_options.is_adapting_schema = true; + + let spark_cast = Arc::new(Cast::new(child, target_type, cast_options)); + + return Ok(Transformed::yes(spark_cast as Arc)); } - Ok(( - Arc::new(SchemaMapping { - required_schema: Arc::::clone(&self.required_schema), - field_mappings, - parquet_options: self.parquet_options.clone(), - default_values: self.default_values.clone(), - }), - projection, - )) + Ok(Transformed::no(expr)) } -} -// TODO SchemaMapping is mostly copied from DataFusion but calls spark_cast -// instead of arrow cast - can we reduce the amount of code copied here and make -// the DataFusion version more extensible? - -/// The SchemaMapping struct holds a mapping from the file schema to the table -/// schema and any necessary type conversions. -/// -/// Note, because `map_batch` and `map_partial_batch` functions have different -/// needs, this struct holds two schemas: -/// -/// 1. The projected **table** schema -/// 2. The full table schema -/// -/// [`map_batch`] is used by the ParquetOpener to produce a RecordBatch which -/// has the projected schema, since that's the schema which is supposed to come -/// out of the execution of this query. Thus `map_batch` uses -/// `projected_table_schema` as it can only operate on the projected fields. -/// -/// [`map_batch`]: Self::map_batch -#[derive(Debug)] -pub struct SchemaMapping { - /// The schema of the table. This is the expected schema after conversion - /// and it should match the schema of the query result. - required_schema: SchemaRef, - /// Mapping from field index in `projected_table_schema` to index in - /// projected file_schema. - /// - /// They are Options instead of just plain `usize`s because the table could - /// have fields that don't exist in the file. - field_mappings: Vec>, - /// Spark cast options - parquet_options: SparkParquetOptions, - default_values: Option>, -} + /// Replace references to missing columns with default values. + fn replace_missing_with_defaults( + &self, + expr: Arc, + ) -> DataFusionResult> { + let Some(defaults) = &self.default_values else { + return Ok(expr); + }; + + if defaults.is_empty() { + return Ok(expr); + } -impl SchemaMapper for SchemaMapping { - /// Adapts a `RecordBatch` to match the `projected_table_schema` using the stored mapping and - /// conversions. The produced RecordBatch has a schema that contains only the projected - /// columns, so if one needs a RecordBatch with a schema that references columns which are not - /// in the projected, it would be better to use `map_partial_batch` - fn map_batch(&self, batch: RecordBatch) -> datafusion::common::Result { - let batch_rows = batch.num_rows(); - let batch_cols = batch.columns().to_vec(); - - let cols = self - .required_schema - // go through each field in the projected schema - .fields() + // Convert index-based defaults to name-based for replace_columns_with_literals + let name_based: HashMap<&str, &ScalarValue> = defaults .iter() - .enumerate() - // and zip it with the index that maps fields from the projected table schema to the - // projected file schema in `batch` - .zip(&self.field_mappings) - // and for each one... - .map(|((field_idx, field), file_idx)| { - file_idx.map_or_else( - // If this field only exists in the table, and not in the file, then we need to - // populate a default value for it. - || { - if self.default_values.is_some() { - // We have a map of default values, see if this field is in there. - if let Some(value) = - self.default_values.as_ref().unwrap().get(&field_idx) - // Default value exists, construct a column from it. - { - let cv = if field.data_type() == &value.data_type() { - ColumnarValue::Scalar(value.clone()) - } else { - // Data types don't match. This can happen when default values - // are stored by Spark in a format different than the column's - // type (e.g., INT32 when the column is DATE32) - spark_parquet_convert( - ColumnarValue::Scalar(value.clone()), - field.data_type(), - &self.parquet_options, - )? - }; - return cv.into_array(batch_rows); - } - } - // Construct an entire column of nulls. We use the Scalar representation - // for better performance. - let cv = - ColumnarValue::Scalar(ScalarValue::try_new_null(field.data_type())?); - cv.into_array(batch_rows) - }, - // However, if it does exist in both, then try to cast it to the correct output - // type - |batch_idx| { - spark_parquet_convert( - ColumnarValue::Array(Arc::clone(&batch_cols[batch_idx])), - field.data_type(), - &self.parquet_options, - )? - .into_array(batch_rows) - }, - ) + .filter_map(|(idx, val)| { + self.logical_file_schema + .fields() + .get(*idx) + .map(|f| (f.name().as_str(), val)) }) - .collect::, _>>()?; + .collect(); - // Necessary to handle empty batches - let options = RecordBatchOptions::new().with_row_count(Some(batch.num_rows())); + if name_based.is_empty() { + return Ok(expr); + } - let schema = Arc::::clone(&self.required_schema); - let record_batch = RecordBatch::try_new_with_options(schema, cols, &options)?; - Ok(record_batch) + replace_columns_with_literals(expr, &name_based) } +} - fn map_column_statistics( - &self, - _file_col_statistics: &[ColumnStatistics], - ) -> datafusion::common::Result> { - Ok(vec![]) +pub fn adapt_batch_with_expressions( + batch: RecordBatch, + target_schema: &SchemaRef, + parquet_options: &SparkParquetOptions, +) -> DataFusionResult { + let file_schema = batch.schema(); + + // If schemas match, no adaptation needed + if file_schema.as_ref() == target_schema.as_ref() { + return Ok(batch); } + + // Create adapter + let factory = SparkPhysicalExprAdapterFactory::new(parquet_options.clone(), None); + let adapter = factory.create(Arc::clone(target_schema), Arc::clone(&file_schema)); + + // Create column projection expressions for target schema + let projection_exprs: Vec> = target_schema + .fields() + .iter() + .enumerate() + .map(|(i, _field)| { + let col_expr: Arc = Arc::new(Column::new_with_schema( + target_schema.field(i).name(), + target_schema.as_ref(), + )?); + adapter.rewrite(col_expr) + }) + .collect::>>()?; + + // Evaluate expressions against batch + let columns: Vec = projection_exprs + .iter() + .map(|expr| { + expr.evaluate(&batch)? + .into_array(batch.num_rows()) + .map_err(|e| e.into()) + }) + .collect::>>()?; + + RecordBatch::try_new(Arc::clone(target_schema), columns).map_err(|e| e.into()) } #[cfg(test)] mod test { use crate::parquet::parquet_support::SparkParquetOptions; - use crate::parquet::schema_adapter::SparkSchemaAdapterFactory; + use crate::parquet::schema_adapter::SparkPhysicalExprAdapterFactory; use arrow::array::UInt32Array; use arrow::array::{Int32Array, StringArray}; - use arrow::datatypes::SchemaRef; use arrow::datatypes::{DataType, Field, Schema}; use arrow::record_batch::RecordBatch; - use datafusion::common::config::TableParquetOptions; use datafusion::common::DataFusionError; - use datafusion::datasource::listing::PartitionedFile; - use datafusion::datasource::physical_plan::FileSource; - use datafusion::datasource::physical_plan::{FileGroup, FileScanConfigBuilder, ParquetSource}; - use datafusion::datasource::source::DataSourceExec; - use datafusion::execution::object_store::ObjectStoreUrl; - use datafusion::execution::TaskContext; - use datafusion::physical_plan::ExecutionPlan; - use datafusion_comet_spark_expr::test_common::file_util::get_temp_filename; + use datafusion::physical_expr_adapter::PhysicalExprAdapterFactory; use datafusion_comet_spark_expr::EvalMode; - use futures::StreamExt; - use parquet::arrow::ArrowWriter; - use std::fs::File; use std::sync::Arc; - #[tokio::test] - async fn parquet_roundtrip_int_as_string() -> Result<(), DataFusionError> { + #[test] + fn test_schema_mapper_int_to_string() -> Result<(), DataFusionError> { let file_schema = Arc::new(Schema::new(vec![ Field::new("id", DataType::Int32, false), Field::new("name", DataType::Utf8, false), @@ -308,13 +245,20 @@ mod test { Field::new("name", DataType::Utf8, false), ])); - let _ = roundtrip(&batch, required_schema).await?; + let spark_options = SparkParquetOptions::new(EvalMode::Legacy, "UTC", false); + let factory = SparkPhysicalExprAdapterFactory::new(spark_options, None); + let mapper = factory.create(Arc::clone(&required_schema), file_schema?)?; + + let result = mapper.map_batch(batch)?; + + assert_eq!(result.num_rows(), 3); + assert_eq!(result.schema(), required_schema); Ok(()) } - #[tokio::test] - async fn parquet_roundtrip_unsigned_int() -> Result<(), DataFusionError> { + #[test] + fn test_schema_mapper_unsigned_int() -> Result<(), DataFusionError> { let file_schema = Arc::new(Schema::new(vec![Field::new("id", DataType::UInt32, false)])); let ids = Arc::new(UInt32Array::from(vec![1, 2, 3])) as Arc; @@ -322,45 +266,17 @@ mod test { let required_schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)])); - let _ = roundtrip(&batch, required_schema).await?; + let mut spark_options = SparkParquetOptions::new(EvalMode::Legacy, "UTC", false); + spark_options.allow_cast_unsigned_ints = true; - Ok(()) - } + let factory = SparkPhysicalExprAdapterFactory::new(spark_options, None); + let mapper = factory.create(Arc::clone(&required_schema), Arc::new(*file_schema.as_ref())); + + let result = mapper.map_batch(batch)?; - /// Create a Parquet file containing a single batch and then read the batch back using - /// the specified required_schema. This will cause the SchemaAdapter code to be used. - async fn roundtrip( - batch: &RecordBatch, - required_schema: SchemaRef, - ) -> Result { - let filename = get_temp_filename(); - let filename = filename.as_path().as_os_str().to_str().unwrap().to_string(); - let file = File::create(&filename)?; - let mut writer = ArrowWriter::try_new(file, Arc::clone(&batch.schema()), None)?; - writer.write(batch)?; - writer.close()?; - - let object_store_url = ObjectStoreUrl::local_filesystem(); - - let mut spark_parquet_options = SparkParquetOptions::new(EvalMode::Legacy, "UTC", false); - spark_parquet_options.allow_cast_unsigned_ints = true; - - let parquet_source = - ParquetSource::new(TableParquetOptions::new()).with_schema_adapter_factory( - Arc::new(SparkSchemaAdapterFactory::new(spark_parquet_options, None)), - )?; - - let files = FileGroup::new(vec![PartitionedFile::from_path(filename.to_string())?]); - let file_scan_config = - FileScanConfigBuilder::new(object_store_url, required_schema, parquet_source) - .with_file_groups(vec![files]) - .build(); - - let parquet_exec = DataSourceExec::new(Arc::new(file_scan_config)); - - let mut stream = parquet_exec - .execute(0, Arc::new(TaskContext::default())) - .unwrap(); - stream.next().await.unwrap() + assert_eq!(result.num_rows(), 3); + assert_eq!(result.schema(), required_schema); + + Ok(()) } } diff --git a/native/spark-expr/src/agg_funcs/covariance.rs b/native/spark-expr/src/agg_funcs/covariance.rs index d40824809a..15759eb155 100644 --- a/native/spark-expr/src/agg_funcs/covariance.rs +++ b/native/spark-expr/src/agg_funcs/covariance.rs @@ -23,9 +23,7 @@ use arrow::{ compute::cast, datatypes::{DataType, Field}, }; -use datafusion::common::{ - downcast_value, unwrap_or_internal_err, DataFusionError, Result, ScalarValue, -}; +use datafusion::common::{downcast_value, unwrap_or_internal_err, Result, ScalarValue}; use datafusion::logical_expr::function::{AccumulatorArgs, StateFieldsArgs}; use datafusion::logical_expr::type_coercion::aggregates::NUMERICS; use datafusion::logical_expr::{Accumulator, AggregateUDFImpl, Signature, Volatility}; diff --git a/native/spark-expr/src/math_funcs/round.rs b/native/spark-expr/src/math_funcs/round.rs index d2cbe4f967..d6302d9b7b 100644 --- a/native/spark-expr/src/math_funcs/round.rs +++ b/native/spark-expr/src/math_funcs/round.rs @@ -19,10 +19,13 @@ use crate::arithmetic_overflow_error; use crate::math_funcs::utils::{get_precision_scale, make_decimal_array, make_decimal_scalar}; use arrow::array::{Array, ArrowNativeTypeOp}; use arrow::array::{Int16Array, Int32Array, Int64Array, Int8Array}; -use arrow::datatypes::DataType; +use arrow::datatypes::{DataType, Field}; use arrow::error::ArrowError; +use datafusion::common::config::ConfigOptions; use datafusion::common::{exec_err, internal_err, DataFusionError, ScalarValue}; -use datafusion::{functions::math::round::round, physical_plan::ColumnarValue}; +use datafusion::functions::math::round::RoundFunc; +use datafusion::logical_expr::{ScalarFunctionArgs, ScalarUDFImpl}; +use datafusion::physical_plan::ColumnarValue; use std::{cmp::min, sync::Arc}; macro_rules! integer_round { @@ -126,10 +129,18 @@ pub fn spark_round( let (precision, scale) = get_precision_scale(data_type); make_decimal_array(array, precision, scale, &f) } - DataType::Float32 | DataType::Float64 => Ok(ColumnarValue::Array(round(&[ - Arc::clone(array), - args[1].to_array(array.len())?, - ])?)), + DataType::Float32 | DataType::Float64 => { + let round_udf = RoundFunc::new(); + let return_field = Arc::new(Field::new("round", array.data_type().clone(), true)); + let args_for_round = ScalarFunctionArgs { + args: vec![ColumnarValue::Array(Arc::clone(array)), args[1].clone()], + number_rows: array.len(), + return_field, + arg_fields: vec![], + config_options: Arc::new(ConfigOptions::default()), + }; + round_udf.invoke_with_args(args_for_round) + } dt => exec_err!("Not supported datatype for ROUND: {dt}"), }, ColumnarValue::Scalar(a) => match a { @@ -150,9 +161,19 @@ pub fn spark_round( let (precision, scale) = get_precision_scale(data_type); make_decimal_scalar(a, precision, scale, &f) } - ScalarValue::Float32(_) | ScalarValue::Float64(_) => Ok(ColumnarValue::Scalar( - ScalarValue::try_from_array(&round(&[a.to_array()?, args[1].to_array(1)?])?, 0)?, - )), + ScalarValue::Float32(_) | ScalarValue::Float64(_) => { + let round_udf = RoundFunc::new(); + let data_type = a.data_type(); + let return_field = Arc::new(Field::new("round", data_type, true)); + let args_for_round = ScalarFunctionArgs { + args: vec![ColumnarValue::Scalar(a.clone()), args[1].clone()], + number_rows: 1, + return_field, + arg_fields: vec![], + config_options: Arc::new(ConfigOptions::default()), + }; + round_udf.invoke_with_args(args_for_round) + } dt => exec_err!("Not supported datatype for ROUND: {dt}"), }, }