From 9a161d2fd2d32e72bdf4e99459b3ff8cf9857c03 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 4 Jun 2025 09:01:49 -0400 Subject: [PATCH 01/22] Pin to https://github.com/apache/arrow-rs/pull/7597 --- Cargo.lock | 63 ++++++++++++++++-------------------------------------- Cargo.toml | 16 ++++++++++++++ 2 files changed, 35 insertions(+), 44 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 102614a35ebb..564a5161b942 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -247,8 +247,7 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" version = "55.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1bb018b6960c87fd9d025009820406f74e83281185a8bdcb44880d2aa5c9a87" +source = "git+https://github.com/apache/arrow-rs.git?rev=ac051b7447d989fed95ad9a2815800b6ba1a8a3c#ac051b7447d989fed95ad9a2815800b6ba1a8a3c" dependencies = [ "arrow-arith", "arrow-array", @@ -271,8 +270,7 @@ dependencies = [ [[package]] name = "arrow-arith" version = "55.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44de76b51473aa888ecd6ad93ceb262fb8d40d1f1154a4df2f069b3590aa7575" +source = "git+https://github.com/apache/arrow-rs.git?rev=ac051b7447d989fed95ad9a2815800b6ba1a8a3c#ac051b7447d989fed95ad9a2815800b6ba1a8a3c" dependencies = [ "arrow-array", "arrow-buffer", @@ -285,8 +283,7 @@ dependencies = [ [[package]] name = "arrow-array" version = "55.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29ed77e22744475a9a53d00026cf8e166fe73cf42d89c4c4ae63607ee1cfcc3f" +source = "git+https://github.com/apache/arrow-rs.git?rev=ac051b7447d989fed95ad9a2815800b6ba1a8a3c#ac051b7447d989fed95ad9a2815800b6ba1a8a3c" dependencies = [ "ahash 0.8.12", "arrow-buffer", @@ -302,8 +299,7 @@ dependencies = [ [[package]] name = "arrow-buffer" version = "55.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0391c96eb58bf7389171d1e103112d3fc3e5625ca6b372d606f2688f1ea4cce" +source = "git+https://github.com/apache/arrow-rs.git?rev=ac051b7447d989fed95ad9a2815800b6ba1a8a3c#ac051b7447d989fed95ad9a2815800b6ba1a8a3c" dependencies = [ "bytes", "half", @@ -313,8 +309,7 @@ dependencies = [ [[package]] name = "arrow-cast" version = "55.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f39e1d774ece9292697fcbe06b5584401b26bd34be1bec25c33edae65c2420ff" +source = "git+https://github.com/apache/arrow-rs.git?rev=ac051b7447d989fed95ad9a2815800b6ba1a8a3c#ac051b7447d989fed95ad9a2815800b6ba1a8a3c" dependencies = [ "arrow-array", "arrow-buffer", @@ -334,8 +329,7 @@ dependencies = [ [[package]] name = "arrow-csv" version = "55.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9055c972a07bf12c2a827debfd34f88d3b93da1941d36e1d9fee85eebe38a12a" +source = "git+https://github.com/apache/arrow-rs.git?rev=ac051b7447d989fed95ad9a2815800b6ba1a8a3c#ac051b7447d989fed95ad9a2815800b6ba1a8a3c" dependencies = [ "arrow-array", "arrow-cast", @@ -350,8 +344,7 @@ dependencies = [ [[package]] name = "arrow-data" version = "55.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf75ac27a08c7f48b88e5c923f267e980f27070147ab74615ad85b5c5f90473d" +source = "git+https://github.com/apache/arrow-rs.git?rev=ac051b7447d989fed95ad9a2815800b6ba1a8a3c#ac051b7447d989fed95ad9a2815800b6ba1a8a3c" dependencies = [ "arrow-buffer", "arrow-schema", @@ -362,8 +355,7 @@ dependencies = [ [[package]] name = "arrow-flight" version = "55.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91efc67a4f5a438833dd76ef674745c80f6f6b9a428a3b440cbfbf74e32867e6" +source = "git+https://github.com/apache/arrow-rs.git?rev=ac051b7447d989fed95ad9a2815800b6ba1a8a3c#ac051b7447d989fed95ad9a2815800b6ba1a8a3c" dependencies = [ "arrow-arith", "arrow-array", @@ -389,8 +381,7 @@ dependencies = [ [[package]] name = "arrow-ipc" version = "55.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a222f0d93772bd058d1268f4c28ea421a603d66f7979479048c429292fac7b2e" +source = "git+https://github.com/apache/arrow-rs.git?rev=ac051b7447d989fed95ad9a2815800b6ba1a8a3c#ac051b7447d989fed95ad9a2815800b6ba1a8a3c" dependencies = [ "arrow-array", "arrow-buffer", @@ -403,8 +394,7 @@ dependencies = [ [[package]] name = "arrow-json" version = "55.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9085342bbca0f75e8cb70513c0807cc7351f1fbf5cb98192a67d5e3044acb033" +source = "git+https://github.com/apache/arrow-rs.git?rev=ac051b7447d989fed95ad9a2815800b6ba1a8a3c#ac051b7447d989fed95ad9a2815800b6ba1a8a3c" dependencies = [ "arrow-array", "arrow-buffer", @@ -425,8 +415,7 @@ dependencies = [ [[package]] name = "arrow-ord" version = "55.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab2f1065a5cad7b9efa9e22ce5747ce826aa3855766755d4904535123ef431e7" +source = "git+https://github.com/apache/arrow-rs.git?rev=ac051b7447d989fed95ad9a2815800b6ba1a8a3c#ac051b7447d989fed95ad9a2815800b6ba1a8a3c" dependencies = [ "arrow-array", "arrow-buffer", @@ -438,8 +427,7 @@ dependencies = [ [[package]] name = "arrow-row" version = "55.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3703a0e3e92d23c3f756df73d2dc9476873f873a76ae63ef9d3de17fda83b2d8" +source = "git+https://github.com/apache/arrow-rs.git?rev=ac051b7447d989fed95ad9a2815800b6ba1a8a3c#ac051b7447d989fed95ad9a2815800b6ba1a8a3c" dependencies = [ "arrow-array", "arrow-buffer", @@ -451,8 +439,7 @@ dependencies = [ [[package]] name = "arrow-schema" version = "55.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73a47aa0c771b5381de2b7f16998d351a6f4eb839f1e13d48353e17e873d969b" +source = "git+https://github.com/apache/arrow-rs.git?rev=ac051b7447d989fed95ad9a2815800b6ba1a8a3c#ac051b7447d989fed95ad9a2815800b6ba1a8a3c" dependencies = [ "bitflags 2.9.1", "serde", @@ -462,8 +449,7 @@ dependencies = [ [[package]] name = "arrow-select" version = "55.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24b7b85575702b23b85272b01bc1c25a01c9b9852305e5d0078c79ba25d995d4" +source = "git+https://github.com/apache/arrow-rs.git?rev=ac051b7447d989fed95ad9a2815800b6ba1a8a3c#ac051b7447d989fed95ad9a2815800b6ba1a8a3c" dependencies = [ "ahash 0.8.12", "arrow-array", @@ -476,8 +462,7 @@ dependencies = [ [[package]] name = "arrow-string" version = "55.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9260fddf1cdf2799ace2b4c2fc0356a9789fa7551e0953e35435536fecefebbd" +source = "git+https://github.com/apache/arrow-rs.git?rev=ac051b7447d989fed95ad9a2815800b6ba1a8a3c#ac051b7447d989fed95ad9a2815800b6ba1a8a3c" dependencies = [ "arrow-array", "arrow-buffer", @@ -1063,7 +1048,7 @@ dependencies = [ "bitflags 2.9.1", "cexpr", "clang-sys", - "itertools 0.12.1", + "itertools 0.10.5", "lazy_static", "lazycell", "log", @@ -3732,15 +3717,6 @@ dependencies = [ "either", ] -[[package]] -name = "itertools" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" -dependencies = [ - "either", -] - [[package]] name = "itertools" version = "0.13.0" @@ -4416,8 +4392,7 @@ dependencies = [ [[package]] name = "parquet" version = "55.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be7b2d778f6b841d37083ebdf32e33a524acde1266b5884a8ca29bf00dfa1231" +source = "git+https://github.com/apache/arrow-rs.git?rev=ac051b7447d989fed95ad9a2815800b6ba1a8a3c#ac051b7447d989fed95ad9a2815800b6ba1a8a3c" dependencies = [ "ahash 0.8.12", "arrow-array", @@ -4839,7 +4814,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf" dependencies = [ "heck 0.5.0", - "itertools 0.14.0", + "itertools 0.13.0", "log", "multimap", "once_cell", @@ -4859,7 +4834,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" dependencies = [ "anyhow", - "itertools 0.14.0", + "itertools 0.13.0", "proc-macro2", "quote", "syn 2.0.101", diff --git a/Cargo.toml b/Cargo.toml index 767b66805fe4..e56e008cead8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -218,3 +218,19 @@ uninlined_format_args = "warn" [workspace.lints.rust] unexpected_cfgs = { level = "warn", check-cfg = ["cfg(tarpaulin)"] } unused_qualifications = "deny" + + +# pin to https://github.com/apache/arrow-rs/pull/7597 +[patch.crates-io] +arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "ac051b7447d989fed95ad9a2815800b6ba1a8a3c" } +arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "ac051b7447d989fed95ad9a2815800b6ba1a8a3c" } +arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "ac051b7447d989fed95ad9a2815800b6ba1a8a3c" } +arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "ac051b7447d989fed95ad9a2815800b6ba1a8a3c" } +arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "ac051b7447d989fed95ad9a2815800b6ba1a8a3c" } +arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "ac051b7447d989fed95ad9a2815800b6ba1a8a3c" } +arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "ac051b7447d989fed95ad9a2815800b6ba1a8a3c" } +arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "ac051b7447d989fed95ad9a2815800b6ba1a8a3c" } +arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "ac051b7447d989fed95ad9a2815800b6ba1a8a3c" } +arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "ac051b7447d989fed95ad9a2815800b6ba1a8a3c" } +arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "ac051b7447d989fed95ad9a2815800b6ba1a8a3c" } +parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "ac051b7447d989fed95ad9a2815800b6ba1a8a3c" } From 083931d6d62758f5d135d5872c0e67a459b88852 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 4 Jun 2025 10:14:55 -0400 Subject: [PATCH 02/22] Update pin --- Cargo.lock | 36 ++++++++++++++++++------------------ Cargo.toml | 38 ++++++++++++++++++++++++++------------ 2 files changed, 44 insertions(+), 30 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 564a5161b942..a571f1b73cbf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -247,7 +247,7 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=ac051b7447d989fed95ad9a2815800b6ba1a8a3c#ac051b7447d989fed95ad9a2815800b6ba1a8a3c" +source = "git+https://github.com/apache/arrow-rs.git?rev=313ed723b2c6fadfcf4d6dfd7894adc4144135c9#313ed723b2c6fadfcf4d6dfd7894adc4144135c9" dependencies = [ "arrow-arith", "arrow-array", @@ -270,7 +270,7 @@ dependencies = [ [[package]] name = "arrow-arith" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=ac051b7447d989fed95ad9a2815800b6ba1a8a3c#ac051b7447d989fed95ad9a2815800b6ba1a8a3c" +source = "git+https://github.com/apache/arrow-rs.git?rev=313ed723b2c6fadfcf4d6dfd7894adc4144135c9#313ed723b2c6fadfcf4d6dfd7894adc4144135c9" dependencies = [ "arrow-array", "arrow-buffer", @@ -283,7 +283,7 @@ dependencies = [ [[package]] name = "arrow-array" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=ac051b7447d989fed95ad9a2815800b6ba1a8a3c#ac051b7447d989fed95ad9a2815800b6ba1a8a3c" +source = "git+https://github.com/apache/arrow-rs.git?rev=313ed723b2c6fadfcf4d6dfd7894adc4144135c9#313ed723b2c6fadfcf4d6dfd7894adc4144135c9" dependencies = [ "ahash 0.8.12", "arrow-buffer", @@ -299,7 +299,7 @@ dependencies = [ [[package]] name = "arrow-buffer" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=ac051b7447d989fed95ad9a2815800b6ba1a8a3c#ac051b7447d989fed95ad9a2815800b6ba1a8a3c" +source = "git+https://github.com/apache/arrow-rs.git?rev=313ed723b2c6fadfcf4d6dfd7894adc4144135c9#313ed723b2c6fadfcf4d6dfd7894adc4144135c9" dependencies = [ "bytes", "half", @@ -309,7 +309,7 @@ dependencies = [ [[package]] name = "arrow-cast" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=ac051b7447d989fed95ad9a2815800b6ba1a8a3c#ac051b7447d989fed95ad9a2815800b6ba1a8a3c" +source = "git+https://github.com/apache/arrow-rs.git?rev=313ed723b2c6fadfcf4d6dfd7894adc4144135c9#313ed723b2c6fadfcf4d6dfd7894adc4144135c9" dependencies = [ "arrow-array", "arrow-buffer", @@ -329,7 +329,7 @@ dependencies = [ [[package]] name = "arrow-csv" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=ac051b7447d989fed95ad9a2815800b6ba1a8a3c#ac051b7447d989fed95ad9a2815800b6ba1a8a3c" +source = "git+https://github.com/apache/arrow-rs.git?rev=313ed723b2c6fadfcf4d6dfd7894adc4144135c9#313ed723b2c6fadfcf4d6dfd7894adc4144135c9" dependencies = [ "arrow-array", "arrow-cast", @@ -344,7 +344,7 @@ dependencies = [ [[package]] name = "arrow-data" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=ac051b7447d989fed95ad9a2815800b6ba1a8a3c#ac051b7447d989fed95ad9a2815800b6ba1a8a3c" +source = "git+https://github.com/apache/arrow-rs.git?rev=313ed723b2c6fadfcf4d6dfd7894adc4144135c9#313ed723b2c6fadfcf4d6dfd7894adc4144135c9" dependencies = [ "arrow-buffer", "arrow-schema", @@ -355,7 +355,7 @@ dependencies = [ [[package]] name = "arrow-flight" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=ac051b7447d989fed95ad9a2815800b6ba1a8a3c#ac051b7447d989fed95ad9a2815800b6ba1a8a3c" +source = "git+https://github.com/apache/arrow-rs.git?rev=313ed723b2c6fadfcf4d6dfd7894adc4144135c9#313ed723b2c6fadfcf4d6dfd7894adc4144135c9" dependencies = [ "arrow-arith", "arrow-array", @@ -381,7 +381,7 @@ dependencies = [ [[package]] name = "arrow-ipc" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=ac051b7447d989fed95ad9a2815800b6ba1a8a3c#ac051b7447d989fed95ad9a2815800b6ba1a8a3c" +source = "git+https://github.com/apache/arrow-rs.git?rev=313ed723b2c6fadfcf4d6dfd7894adc4144135c9#313ed723b2c6fadfcf4d6dfd7894adc4144135c9" dependencies = [ "arrow-array", "arrow-buffer", @@ -394,7 +394,7 @@ dependencies = [ [[package]] name = "arrow-json" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=ac051b7447d989fed95ad9a2815800b6ba1a8a3c#ac051b7447d989fed95ad9a2815800b6ba1a8a3c" +source = "git+https://github.com/apache/arrow-rs.git?rev=313ed723b2c6fadfcf4d6dfd7894adc4144135c9#313ed723b2c6fadfcf4d6dfd7894adc4144135c9" dependencies = [ "arrow-array", "arrow-buffer", @@ -415,7 +415,7 @@ dependencies = [ [[package]] name = "arrow-ord" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=ac051b7447d989fed95ad9a2815800b6ba1a8a3c#ac051b7447d989fed95ad9a2815800b6ba1a8a3c" +source = "git+https://github.com/apache/arrow-rs.git?rev=313ed723b2c6fadfcf4d6dfd7894adc4144135c9#313ed723b2c6fadfcf4d6dfd7894adc4144135c9" dependencies = [ "arrow-array", "arrow-buffer", @@ -427,7 +427,7 @@ dependencies = [ [[package]] name = "arrow-row" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=ac051b7447d989fed95ad9a2815800b6ba1a8a3c#ac051b7447d989fed95ad9a2815800b6ba1a8a3c" +source = "git+https://github.com/apache/arrow-rs.git?rev=313ed723b2c6fadfcf4d6dfd7894adc4144135c9#313ed723b2c6fadfcf4d6dfd7894adc4144135c9" dependencies = [ "arrow-array", "arrow-buffer", @@ -439,7 +439,7 @@ dependencies = [ [[package]] name = "arrow-schema" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=ac051b7447d989fed95ad9a2815800b6ba1a8a3c#ac051b7447d989fed95ad9a2815800b6ba1a8a3c" +source = "git+https://github.com/apache/arrow-rs.git?rev=313ed723b2c6fadfcf4d6dfd7894adc4144135c9#313ed723b2c6fadfcf4d6dfd7894adc4144135c9" dependencies = [ "bitflags 2.9.1", "serde", @@ -449,7 +449,7 @@ dependencies = [ [[package]] name = "arrow-select" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=ac051b7447d989fed95ad9a2815800b6ba1a8a3c#ac051b7447d989fed95ad9a2815800b6ba1a8a3c" +source = "git+https://github.com/apache/arrow-rs.git?rev=313ed723b2c6fadfcf4d6dfd7894adc4144135c9#313ed723b2c6fadfcf4d6dfd7894adc4144135c9" dependencies = [ "ahash 0.8.12", "arrow-array", @@ -462,7 +462,7 @@ dependencies = [ [[package]] name = "arrow-string" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=ac051b7447d989fed95ad9a2815800b6ba1a8a3c#ac051b7447d989fed95ad9a2815800b6ba1a8a3c" +source = "git+https://github.com/apache/arrow-rs.git?rev=313ed723b2c6fadfcf4d6dfd7894adc4144135c9#313ed723b2c6fadfcf4d6dfd7894adc4144135c9" dependencies = [ "arrow-array", "arrow-buffer", @@ -4392,7 +4392,7 @@ dependencies = [ [[package]] name = "parquet" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=ac051b7447d989fed95ad9a2815800b6ba1a8a3c#ac051b7447d989fed95ad9a2815800b6ba1a8a3c" +source = "git+https://github.com/apache/arrow-rs.git?rev=313ed723b2c6fadfcf4d6dfd7894adc4144135c9#313ed723b2c6fadfcf4d6dfd7894adc4144135c9" dependencies = [ "ahash 0.8.12", "arrow-array", @@ -4814,7 +4814,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf" dependencies = [ "heck 0.5.0", - "itertools 0.13.0", + "itertools 0.14.0", "log", "multimap", "once_cell", @@ -4834,7 +4834,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" dependencies = [ "anyhow", - "itertools 0.13.0", + "itertools 0.14.0", "proc-macro2", "quote", "syn 2.0.101", diff --git a/Cargo.toml b/Cargo.toml index e56e008cead8..de1013cbe931 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -222,15 +222,29 @@ unused_qualifications = "deny" # pin to https://github.com/apache/arrow-rs/pull/7597 [patch.crates-io] -arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "ac051b7447d989fed95ad9a2815800b6ba1a8a3c" } -arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "ac051b7447d989fed95ad9a2815800b6ba1a8a3c" } -arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "ac051b7447d989fed95ad9a2815800b6ba1a8a3c" } -arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "ac051b7447d989fed95ad9a2815800b6ba1a8a3c" } -arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "ac051b7447d989fed95ad9a2815800b6ba1a8a3c" } -arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "ac051b7447d989fed95ad9a2815800b6ba1a8a3c" } -arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "ac051b7447d989fed95ad9a2815800b6ba1a8a3c" } -arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "ac051b7447d989fed95ad9a2815800b6ba1a8a3c" } -arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "ac051b7447d989fed95ad9a2815800b6ba1a8a3c" } -arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "ac051b7447d989fed95ad9a2815800b6ba1a8a3c" } -arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "ac051b7447d989fed95ad9a2815800b6ba1a8a3c" } -parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "ac051b7447d989fed95ad9a2815800b6ba1a8a3c" } +arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "313ed723b2c6fadfcf4d6dfd7894adc4144135c9" } +arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "313ed723b2c6fadfcf4d6dfd7894adc4144135c9" } +arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "313ed723b2c6fadfcf4d6dfd7894adc4144135c9" } +arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "313ed723b2c6fadfcf4d6dfd7894adc4144135c9" } +arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "313ed723b2c6fadfcf4d6dfd7894adc4144135c9" } +arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "313ed723b2c6fadfcf4d6dfd7894adc4144135c9" } +arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "313ed723b2c6fadfcf4d6dfd7894adc4144135c9" } +arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "313ed723b2c6fadfcf4d6dfd7894adc4144135c9" } +arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "313ed723b2c6fadfcf4d6dfd7894adc4144135c9" } +arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "313ed723b2c6fadfcf4d6dfd7894adc4144135c9" } +arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "313ed723b2c6fadfcf4d6dfd7894adc4144135c9" } +parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "313ed723b2c6fadfcf4d6dfd7894adc4144135c9" } + +#arrow = { path= "/Users/andrewlamb/Software/arrow-rs/arrow" } +#arrow-array = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-array" } +#arrow-buffer = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-buffer" } +#arrow-cast = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-cast" } +#arrow-data = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-data" } +#arrow-ipc = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-ipc" } +#arrow-schema = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-schema" } +#arrow-select = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-select" } +#arrow-string = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-string" } +#arrow-ord = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-ord" } +#arrow-flight = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-flight" } +#parquet = { path= "/Users/andrewlamb/Software/arrow-rs/parquet" } + From e79454f48929360b2753c51d9647f9a302c9e184 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 4 Jun 2025 10:15:09 -0400 Subject: [PATCH 03/22] Use upstream BatchCoalescer --- datafusion/physical-plan/src/coalesce/mod.rs | 397 +++--------------- .../physical-plan/src/coalesce_batches.rs | 129 ++---- 2 files changed, 100 insertions(+), 426 deletions(-) diff --git a/datafusion/physical-plan/src/coalesce/mod.rs b/datafusion/physical-plan/src/coalesce/mod.rs index 0eca27f8e40e..9b010fbe9b97 100644 --- a/datafusion/physical-plan/src/coalesce/mod.rs +++ b/datafusion/physical-plan/src/coalesce/mod.rs @@ -15,76 +15,27 @@ // specific language governing permissions and limitations // under the License. -use arrow::array::{ - builder::StringViewBuilder, cast::AsArray, Array, ArrayRef, RecordBatch, - RecordBatchOptions, -}; -use arrow::compute::concat_batches; +use arrow::array::RecordBatch; +use arrow::compute::BatchCoalescer; use arrow::datatypes::SchemaRef; -use std::sync::Arc; +use datafusion_common::{internal_err, Result}; -/// Concatenate multiple [`RecordBatch`]es -/// -/// `BatchCoalescer` concatenates multiple small [`RecordBatch`]es, produced by -/// operations such as `FilterExec` and `RepartitionExec`, into larger ones for -/// more efficient processing by subsequent operations. -/// -/// # Background -/// -/// Generally speaking, larger [`RecordBatch`]es are more efficient to process -/// than smaller record batches (until the CPU cache is exceeded) because there -/// is fixed processing overhead per batch. DataFusion tries to operate on -/// batches of `target_batch_size` rows to amortize this overhead -/// -/// ```text -/// ┌────────────────────┐ -/// │ RecordBatch │ -/// │ num_rows = 23 │ -/// └────────────────────┘ ┌────────────────────┐ -/// │ │ -/// ┌────────────────────┐ Coalesce │ │ -/// │ │ Batches │ │ -/// │ RecordBatch │ │ │ -/// │ num_rows = 50 │ ─ ─ ─ ─ ─ ─ ▶ │ │ -/// │ │ │ RecordBatch │ -/// │ │ │ num_rows = 106 │ -/// └────────────────────┘ │ │ -/// │ │ -/// ┌────────────────────┐ │ │ -/// │ │ │ │ -/// │ RecordBatch │ │ │ -/// │ num_rows = 33 │ └────────────────────┘ -/// │ │ -/// └────────────────────┘ -/// ``` -/// -/// # Notes: -/// -/// 1. Output rows are produced in the same order as the input rows -/// -/// 2. The output is a sequence of batches, with all but the last being at least -/// `target_batch_size` rows. -/// -/// 3. Eventually this may also be able to handle other optimizations such as a -/// combined filter/coalesce operation. +/// Concatenate multiple [`RecordBatch`]es and apply a limit /// +/// See [`BatchCoalescer`] for more details on how this works. #[derive(Debug)] -pub struct BatchCoalescer { - /// The input schema - schema: SchemaRef, - /// Minimum number of rows for coalesces batches - target_batch_size: usize, +pub struct LimitedBatchCoalescer { + /// The arrow structure that builds the output batches + inner: BatchCoalescer, /// Total number of rows returned so far total_rows: usize, - /// Buffered batches - buffer: Vec, - /// Buffered row count - buffered_rows: usize, /// Limit: maximum number of rows to fetch, `None` means fetch all rows fetch: Option, + /// Indicates if the coalescer is finished + finished: bool, } -impl BatchCoalescer { +impl LimitedBatchCoalescer { /// Create a new `BatchCoalescer` /// /// # Arguments @@ -98,187 +49,83 @@ impl BatchCoalescer { fetch: Option, ) -> Self { Self { - schema, - target_batch_size, + inner: BatchCoalescer::new(schema, target_batch_size), total_rows: 0, - buffer: vec![], - buffered_rows: 0, fetch, + finished: false, } } /// Return the schema of the output batches pub fn schema(&self) -> SchemaRef { - Arc::clone(&self.schema) + self.inner.schema() } - /// Push next batch, and returns [`CoalescerState`] indicating the current - /// state of the buffer. - pub fn push_batch(&mut self, batch: RecordBatch) -> CoalescerState { - let batch = gc_string_view_batch(&batch); - if self.limit_reached(&batch) { - CoalescerState::LimitReached - } else if self.target_reached(batch) { - CoalescerState::TargetReached - } else { - CoalescerState::Continue + /// Push next batch, and returns [`true`] indicating if the limit is hit + /// + /// If the limit is reached, the caller must call [`Self::finish()`] to + /// complete the buffered results as a batch and finish the query. + pub fn push_batch(&mut self, batch: RecordBatch) -> Result { + if self.finished { + return internal_err!( + "LimitedBatchCoalescer: cannot push batch after finish" + ); } - } - /// Return true if the there is no data buffered - pub fn is_empty(&self) -> bool { - self.buffer.is_empty() - } + // if we are at the limit, return LimitReached + if let Some(fetch) = self.fetch { + // limit previously reached + if self.total_rows >= fetch { + return Ok(true); + } - /// Checks if the buffer will reach the specified limit after getting - /// `batch`. - /// - /// If fetch would be exceeded, slices the received batch, updates the - /// buffer with it, and returns `true`. - /// - /// Otherwise: does nothing and returns `false`. - fn limit_reached(&mut self, batch: &RecordBatch) -> bool { - match self.fetch { - Some(fetch) if self.total_rows + batch.num_rows() >= fetch => { + // limit now reached + if self.total_rows + batch.num_rows() >= fetch { // Limit is reached let remaining_rows = fetch - self.total_rows; debug_assert!(remaining_rows > 0); - let batch = batch.slice(0, remaining_rows); - self.buffered_rows += batch.num_rows(); - self.total_rows = fetch; - self.buffer.push(batch); - true + let batch_head = batch.slice(0, remaining_rows); + self.total_rows += batch_head.num_rows(); + self.inner.push_batch(batch_head)?; + return Ok(true); } - _ => false, } - } - /// Updates the buffer with the given batch. - /// - /// If the target batch size is reached, returns `true`. Otherwise, returns - /// `false`. - fn target_reached(&mut self, batch: RecordBatch) -> bool { - if batch.num_rows() == 0 { - false - } else { - self.total_rows += batch.num_rows(); - self.buffered_rows += batch.num_rows(); - self.buffer.push(batch); - self.buffered_rows >= self.target_batch_size - } + self.total_rows += batch.num_rows(); + self.inner.push_batch(batch)?; + + Ok(false) // not at limit } - /// Concatenates and returns all buffered batches, and clears the buffer. - pub fn finish_batch(&mut self) -> datafusion_common::Result { - let batch = concat_batches(&self.schema, &self.buffer)?; - self.buffer.clear(); - self.buffered_rows = 0; - Ok(batch) + /// Return true if there is no data buffered + pub fn is_empty(&self) -> bool { + self.inner.is_empty() } -} -/// Indicates the state of the [`BatchCoalescer`] buffer after the -/// [`BatchCoalescer::push_batch()`] operation. -/// -/// The caller should take different actions, depending on the variant returned. -pub enum CoalescerState { - /// Neither the limit nor the target batch size is reached. - /// - /// Action: continue pushing batches. - Continue, - /// The limit has been reached. - /// - /// Action: call [`BatchCoalescer::finish_batch()`] to get the final - /// buffered results as a batch and finish the query. - LimitReached, - /// The specified minimum number of rows a batch should have is reached. + /// Complete the current buffered batch and finish the coalescer /// - /// Action: call [`BatchCoalescer::finish_batch()`] to get the current - /// buffered results as a batch and then continue pushing batches. - TargetReached, -} - -/// Heuristically compact `StringViewArray`s to reduce memory usage, if needed -/// -/// Decides when to consolidate the StringView into a new buffer to reduce -/// memory usage and improve string locality for better performance. -/// -/// This differs from `StringViewArray::gc` because: -/// 1. It may not compact the array depending on a heuristic. -/// 2. It uses a precise block size to reduce the number of buffers to track. -/// -/// # Heuristic -/// -/// If the average size of each view is larger than 32 bytes, we compact the array. -/// -/// `StringViewArray` include pointers to buffer that hold the underlying data. -/// One of the great benefits of `StringViewArray` is that many operations -/// (e.g., `filter`) can be done without copying the underlying data. -/// -/// However, after a while (e.g., after `FilterExec` or `HashJoinExec`) the -/// `StringViewArray` may only refer to a small portion of the buffer, -/// significantly increasing memory usage. -fn gc_string_view_batch(batch: &RecordBatch) -> RecordBatch { - let new_columns: Vec = batch - .columns() - .iter() - .map(|c| { - // Try to re-create the `StringViewArray` to prevent holding the underlying buffer too long. - let Some(s) = c.as_string_view_opt() else { - return Arc::clone(c); - }; - let ideal_buffer_size: usize = s - .views() - .iter() - .map(|v| { - let len = (*v as u32) as usize; - if len > 12 { - len - } else { - 0 - } - }) - .sum(); - let actual_buffer_size = s.get_buffer_memory_size(); - - // Re-creating the array copies data and can be time consuming. - // We only do it if the array is sparse - if actual_buffer_size > (ideal_buffer_size * 2) { - // We set the block size to `ideal_buffer_size` so that the new StringViewArray only has one buffer, which accelerate later concat_batches. - // See https://github.com/apache/arrow-rs/issues/6094 for more details. - let mut builder = StringViewBuilder::with_capacity(s.len()); - if ideal_buffer_size > 0 { - builder = builder.with_fixed_block_size(ideal_buffer_size as u32); - } - - for v in s.iter() { - builder.append_option(v); - } - - let gc_string = builder.finish(); - - debug_assert!(gc_string.data_buffers().len() <= 1); // buffer count can be 0 if the `ideal_buffer_size` is 0 + /// Any subsequent calls to `push_batch()` will return an Err + pub fn finish(&mut self) -> Result<()> { + self.inner.finish_buffered_batch()?; + self.finished = true; + Ok(()) + } - Arc::new(gc_string) - } else { - Arc::clone(c) - } - }) - .collect(); - let mut options = RecordBatchOptions::new(); - options = options.with_row_count(Some(batch.num_rows())); - RecordBatch::try_new_with_options(batch.schema(), new_columns, &options) - .expect("Failed to re-create the gc'ed record batch") + /// Return the next completed batch, if any + pub fn next_completed_batch(&mut self) -> Option { + self.inner.next_completed_batch() + } } #[cfg(test)] mod tests { - use std::ops::Range; - use super::*; + use std::ops::Range; + use std::sync::Arc; - use arrow::array::{builder::ArrayBuilder, StringViewArray, UInt32Array}; + use arrow::array::UInt32Array; + use arrow::compute::concat_batches; use arrow::datatypes::{DataType, Field, Schema}; #[test] @@ -286,9 +133,9 @@ mod tests { let batch = uint32_batch(0..8); Test::new() .with_batches(std::iter::repeat_n(batch, 10)) - // expected output is batches of at least 20 rows (except for the final batch) + // expected output is batches of exactly 21 rows (except for the final batch) .with_target_batch_size(21) - .with_expected_output_sizes(vec![24, 24, 24, 8]) + .with_expected_output_sizes(vec![21, 21, 21, 17]) .run() } @@ -352,7 +199,7 @@ mod tests { .run() } - /// Test for [`BatchCoalescer`] + /// Test for [`LimitedBatchCoalescer`] /// /// Pushes the input batches to the coalescer and verifies that the resulting /// batches have the expected number of rows and contents. @@ -425,24 +272,18 @@ mod tests { let single_input_batch = concat_batches(&schema, &input_batches).unwrap(); let mut coalescer = - BatchCoalescer::new(Arc::clone(&schema), target_batch_size, fetch); + LimitedBatchCoalescer::new(Arc::clone(&schema), target_batch_size, fetch); let mut output_batches = vec![]; for batch in input_batches { - match coalescer.push_batch(batch) { - CoalescerState::Continue => {} - CoalescerState::LimitReached => { - output_batches.push(coalescer.finish_batch().unwrap()); - break; - } - CoalescerState::TargetReached => { - coalescer.buffered_rows = 0; - output_batches.push(coalescer.finish_batch().unwrap()); - } + if coalescer.push_batch(batch).unwrap() { + // at limit, finish the coalescer + break; } } - if coalescer.buffered_rows != 0 { - output_batches.extend(coalescer.buffer); + coalescer.finish().unwrap(); + while let Some(batch) = coalescer.next_completed_batch() { + output_batches.push(batch); } // make sure we got the expected number of output batches and content @@ -488,110 +329,6 @@ mod tests { .unwrap() } - #[test] - fn test_gc_string_view_batch_small_no_compact() { - // view with only short strings (no buffers) --> no need to compact - let array = StringViewTest { - rows: 1000, - strings: vec![Some("a"), Some("b"), Some("c")], - } - .build(); - - let gc_array = do_gc(array.clone()); - compare_string_array_values(&array, &gc_array); - assert_eq!(array.data_buffers().len(), 0); - assert_eq!(array.data_buffers().len(), gc_array.data_buffers().len()); // no compaction - } - - #[test] - fn test_gc_string_view_test_batch_empty() { - let schema = Schema::empty(); - let batch = RecordBatch::new_empty(schema.into()); - let output_batch = gc_string_view_batch(&batch); - assert_eq!(batch.num_columns(), output_batch.num_columns()); - assert_eq!(batch.num_rows(), output_batch.num_rows()); - } - - #[test] - fn test_gc_string_view_batch_large_no_compact() { - // view with large strings (has buffers) but full --> no need to compact - let array = StringViewTest { - rows: 1000, - strings: vec![Some("This string is longer than 12 bytes")], - } - .build(); - - let gc_array = do_gc(array.clone()); - compare_string_array_values(&array, &gc_array); - assert_eq!(array.data_buffers().len(), 5); - assert_eq!(array.data_buffers().len(), gc_array.data_buffers().len()); // no compaction - } - - #[test] - fn test_gc_string_view_batch_large_slice_compact() { - // view with large strings (has buffers) and only partially used --> no need to compact - let array = StringViewTest { - rows: 1000, - strings: vec![Some("this string is longer than 12 bytes")], - } - .build(); - - // slice only 11 rows, so most of the buffer is not used - let array = array.slice(11, 22); - - let gc_array = do_gc(array.clone()); - compare_string_array_values(&array, &gc_array); - assert_eq!(array.data_buffers().len(), 5); - assert_eq!(gc_array.data_buffers().len(), 1); // compacted into a single buffer - } - - /// Compares the values of two string view arrays - fn compare_string_array_values(arr1: &StringViewArray, arr2: &StringViewArray) { - assert_eq!(arr1.len(), arr2.len()); - for (s1, s2) in arr1.iter().zip(arr2.iter()) { - assert_eq!(s1, s2); - } - } - - /// runs garbage collection on string view array - /// and ensures the number of rows are the same - fn do_gc(array: StringViewArray) -> StringViewArray { - let batch = - RecordBatch::try_from_iter(vec![("a", Arc::new(array) as ArrayRef)]).unwrap(); - let gc_batch = gc_string_view_batch(&batch); - assert_eq!(batch.num_rows(), gc_batch.num_rows()); - assert_eq!(batch.schema(), gc_batch.schema()); - gc_batch - .column(0) - .as_any() - .downcast_ref::() - .unwrap() - .clone() - } - - /// Describes parameters for creating a `StringViewArray` - struct StringViewTest { - /// The number of rows in the array - rows: usize, - /// The strings to use in the array (repeated over and over - strings: Vec>, - } - - impl StringViewTest { - /// Create a `StringViewArray` with the parameters specified in this struct - fn build(self) -> StringViewArray { - let mut builder = - StringViewBuilder::with_capacity(100).with_fixed_block_size(8192); - loop { - for &v in self.strings.iter() { - builder.append_option(v); - if builder.len() >= self.rows { - return builder.finish(); - } - } - } - } - } fn batch_to_pretty_strings(batch: &RecordBatch) -> String { arrow::util::pretty::pretty_format_batches(&[batch.clone()]) .unwrap() diff --git a/datafusion/physical-plan/src/coalesce_batches.rs b/datafusion/physical-plan/src/coalesce_batches.rs index f35231fb6a99..7f3cc3142966 100644 --- a/datafusion/physical-plan/src/coalesce_batches.rs +++ b/datafusion/physical-plan/src/coalesce_batches.rs @@ -34,7 +34,7 @@ use datafusion_common::Result; use datafusion_execution::TaskContext; use datafusion_physical_expr::PhysicalExpr; -use crate::coalesce::{BatchCoalescer, CoalescerState}; +use crate::coalesce::LimitedBatchCoalescer; use crate::execution_plan::CardinalityEffect; use crate::filter_pushdown::{ ChildPushdownResult, FilterDescription, FilterPushdownPropagation, @@ -52,7 +52,7 @@ use futures::stream::{Stream, StreamExt}; /// buffering and returns the final batch once the number of collected rows /// reaches the `fetch` value. /// -/// See [`BatchCoalescer`] for more information +/// See [`LimitedBatchCoalescer`] for more information #[derive(Debug, Clone)] pub struct CoalesceBatchesExec { /// The input plan @@ -181,14 +181,13 @@ impl ExecutionPlan for CoalesceBatchesExec { ) -> Result { Ok(Box::pin(CoalesceBatchesStream { input: self.input.execute(partition, context)?, - coalescer: BatchCoalescer::new( + coalescer: LimitedBatchCoalescer::new( self.input.schema(), self.target_batch_size, self.fetch, ), baseline_metrics: BaselineMetrics::new(&self.metrics, partition), - // Start by pulling data - inner_state: CoalesceBatchesStreamState::Pull, + completed: false, })) } @@ -252,12 +251,11 @@ struct CoalesceBatchesStream { /// The input plan input: SendableRecordBatchStream, /// Buffer for combining batches - coalescer: BatchCoalescer, + coalescer: LimitedBatchCoalescer, /// Execution metrics baseline_metrics: BaselineMetrics, - /// The current inner state of the stream. This state dictates the current - /// action or operation to be performed in the streaming process. - inner_state: CoalesceBatchesStreamState, + /// is the input stream exhausted or limit reached? + completed: bool, } impl Stream for CoalesceBatchesStream { @@ -277,50 +275,6 @@ impl Stream for CoalesceBatchesStream { } } -/// Enumeration of possible states for `CoalesceBatchesStream`. -/// It represents different stages in the lifecycle of a stream of record batches. -/// -/// An example of state transition: -/// Notation: -/// `[3000]`: A batch with size 3000 -/// `{[2000], [3000]}`: `CoalesceBatchStream`'s internal buffer with 2 batches buffered -/// Input of `CoalesceBatchStream` will generate three batches `[2000], [3000], [4000]` -/// The coalescing procedure will go through the following steps with 4096 coalescing threshold: -/// 1. Read the first batch and get it buffered. -/// - initial state: `Pull` -/// - initial buffer: `{}` -/// - updated buffer: `{[2000]}` -/// - next state: `Pull` -/// 2. Read the second batch, the coalescing target is reached since 2000 + 3000 > 4096 -/// - initial state: `Pull` -/// - initial buffer: `{[2000]}` -/// - updated buffer: `{[2000], [3000]}` -/// - next state: `ReturnBuffer` -/// 4. Two batches in the batch get merged and consumed by the upstream operator. -/// - initial state: `ReturnBuffer` -/// - initial buffer: `{[2000], [3000]}` -/// - updated buffer: `{}` -/// - next state: `Pull` -/// 5. Read the third input batch. -/// - initial state: `Pull` -/// - initial buffer: `{}` -/// - updated buffer: `{[4000]}` -/// - next state: `Pull` -/// 5. The input is ended now. Jump to exhaustion state preparing the finalized data. -/// - initial state: `Pull` -/// - initial buffer: `{[4000]}` -/// - updated buffer: `{[4000]}` -/// - next state: `Exhausted` -#[derive(Debug, Clone, Eq, PartialEq)] -enum CoalesceBatchesStreamState { - /// State to pull a new batch from the input stream. - Pull, - /// State to return a buffered batch. - ReturnBuffer, - /// State indicating that the stream is exhausted. - Exhausted, -} - impl CoalesceBatchesStream { fn poll_next_inner( self: &mut Pin<&mut Self>, @@ -328,51 +282,34 @@ impl CoalesceBatchesStream { ) -> Poll>> { let cloned_time = self.baseline_metrics.elapsed_compute().clone(); loop { - match &self.inner_state { - CoalesceBatchesStreamState::Pull => { - // Attempt to pull the next batch from the input stream. - let input_batch = ready!(self.input.poll_next_unpin(cx)); - // Start timing the operation. The timer records time upon being dropped. - let _timer = cloned_time.timer(); - - match input_batch { - Some(Ok(batch)) => match self.coalescer.push_batch(batch) { - CoalescerState::Continue => {} - CoalescerState::LimitReached => { - self.inner_state = CoalesceBatchesStreamState::Exhausted; - } - CoalescerState::TargetReached => { - self.inner_state = - CoalesceBatchesStreamState::ReturnBuffer; - } - }, - None => { - // End of input stream, but buffered batches might still be present. - self.inner_state = CoalesceBatchesStreamState::Exhausted; - } - other => return Poll::Ready(other), - } - } - CoalesceBatchesStreamState::ReturnBuffer => { - let _timer = cloned_time.timer(); - // Combine buffered batches into one batch and return it. - let batch = self.coalescer.finish_batch()?; - // Set to pull state for the next iteration. - self.inner_state = CoalesceBatchesStreamState::Pull; - return Poll::Ready(Some(Ok(batch))); + // If there is any completed batch ready, return it + if let Some(batch) = self.coalescer.next_completed_batch() { + return Poll::Ready(Some(Ok(batch))); + } + if self.completed { + // If input is done and no batches are ready, return None to signal end of stream. + return Poll::Ready(None); + } + // Attempt to pull the next batch from the input stream. + let input_batch = ready!(self.input.poll_next_unpin(cx)); + // Start timing the operation. The timer records time upon being dropped. + let _timer = cloned_time.timer(); + + match input_batch { + None => { + // Input stream is exhausted, finalize any remaining batches + self.completed = true; + self.coalescer.finish()?; } - CoalesceBatchesStreamState::Exhausted => { - // Handle the end of the input stream. - return if self.coalescer.is_empty() { - // If buffer is empty, return None indicating the stream is fully consumed. - Poll::Ready(None) - } else { - let _timer = cloned_time.timer(); - // If the buffer still contains batches, prepare to return them. - let batch = self.coalescer.finish_batch()?; - Poll::Ready(Some(Ok(batch))) - }; + Some(Ok(batch)) => { + if self.coalescer.push_batch(batch)? { + // limit was reached, so stop early + self.completed = true; + self.coalescer.finish()?; + } } + // Error case + other => return Poll::Ready(other), } } } From 4e8e1ce6d795e62d963283f259d069cf0ababd69 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 4 Jun 2025 11:38:52 -0400 Subject: [PATCH 04/22] Update the pin --- Cargo.lock | 32 ++++++++++++++++---------------- Cargo.toml | 24 ++++++++++++------------ 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a571f1b73cbf..27d35c75521a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -247,7 +247,7 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=313ed723b2c6fadfcf4d6dfd7894adc4144135c9#313ed723b2c6fadfcf4d6dfd7894adc4144135c9" +source = "git+https://github.com/apache/arrow-rs.git?rev=363a747dbce79896f37bc2807eb6446d3f5067b0#363a747dbce79896f37bc2807eb6446d3f5067b0" dependencies = [ "arrow-arith", "arrow-array", @@ -270,7 +270,7 @@ dependencies = [ [[package]] name = "arrow-arith" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=313ed723b2c6fadfcf4d6dfd7894adc4144135c9#313ed723b2c6fadfcf4d6dfd7894adc4144135c9" +source = "git+https://github.com/apache/arrow-rs.git?rev=363a747dbce79896f37bc2807eb6446d3f5067b0#363a747dbce79896f37bc2807eb6446d3f5067b0" dependencies = [ "arrow-array", "arrow-buffer", @@ -283,7 +283,7 @@ dependencies = [ [[package]] name = "arrow-array" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=313ed723b2c6fadfcf4d6dfd7894adc4144135c9#313ed723b2c6fadfcf4d6dfd7894adc4144135c9" +source = "git+https://github.com/apache/arrow-rs.git?rev=363a747dbce79896f37bc2807eb6446d3f5067b0#363a747dbce79896f37bc2807eb6446d3f5067b0" dependencies = [ "ahash 0.8.12", "arrow-buffer", @@ -299,7 +299,7 @@ dependencies = [ [[package]] name = "arrow-buffer" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=313ed723b2c6fadfcf4d6dfd7894adc4144135c9#313ed723b2c6fadfcf4d6dfd7894adc4144135c9" +source = "git+https://github.com/apache/arrow-rs.git?rev=363a747dbce79896f37bc2807eb6446d3f5067b0#363a747dbce79896f37bc2807eb6446d3f5067b0" dependencies = [ "bytes", "half", @@ -309,7 +309,7 @@ dependencies = [ [[package]] name = "arrow-cast" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=313ed723b2c6fadfcf4d6dfd7894adc4144135c9#313ed723b2c6fadfcf4d6dfd7894adc4144135c9" +source = "git+https://github.com/apache/arrow-rs.git?rev=363a747dbce79896f37bc2807eb6446d3f5067b0#363a747dbce79896f37bc2807eb6446d3f5067b0" dependencies = [ "arrow-array", "arrow-buffer", @@ -329,7 +329,7 @@ dependencies = [ [[package]] name = "arrow-csv" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=313ed723b2c6fadfcf4d6dfd7894adc4144135c9#313ed723b2c6fadfcf4d6dfd7894adc4144135c9" +source = "git+https://github.com/apache/arrow-rs.git?rev=363a747dbce79896f37bc2807eb6446d3f5067b0#363a747dbce79896f37bc2807eb6446d3f5067b0" dependencies = [ "arrow-array", "arrow-cast", @@ -344,7 +344,7 @@ dependencies = [ [[package]] name = "arrow-data" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=313ed723b2c6fadfcf4d6dfd7894adc4144135c9#313ed723b2c6fadfcf4d6dfd7894adc4144135c9" +source = "git+https://github.com/apache/arrow-rs.git?rev=363a747dbce79896f37bc2807eb6446d3f5067b0#363a747dbce79896f37bc2807eb6446d3f5067b0" dependencies = [ "arrow-buffer", "arrow-schema", @@ -355,7 +355,7 @@ dependencies = [ [[package]] name = "arrow-flight" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=313ed723b2c6fadfcf4d6dfd7894adc4144135c9#313ed723b2c6fadfcf4d6dfd7894adc4144135c9" +source = "git+https://github.com/apache/arrow-rs.git?rev=363a747dbce79896f37bc2807eb6446d3f5067b0#363a747dbce79896f37bc2807eb6446d3f5067b0" dependencies = [ "arrow-arith", "arrow-array", @@ -381,7 +381,7 @@ dependencies = [ [[package]] name = "arrow-ipc" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=313ed723b2c6fadfcf4d6dfd7894adc4144135c9#313ed723b2c6fadfcf4d6dfd7894adc4144135c9" +source = "git+https://github.com/apache/arrow-rs.git?rev=363a747dbce79896f37bc2807eb6446d3f5067b0#363a747dbce79896f37bc2807eb6446d3f5067b0" dependencies = [ "arrow-array", "arrow-buffer", @@ -394,7 +394,7 @@ dependencies = [ [[package]] name = "arrow-json" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=313ed723b2c6fadfcf4d6dfd7894adc4144135c9#313ed723b2c6fadfcf4d6dfd7894adc4144135c9" +source = "git+https://github.com/apache/arrow-rs.git?rev=363a747dbce79896f37bc2807eb6446d3f5067b0#363a747dbce79896f37bc2807eb6446d3f5067b0" dependencies = [ "arrow-array", "arrow-buffer", @@ -415,7 +415,7 @@ dependencies = [ [[package]] name = "arrow-ord" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=313ed723b2c6fadfcf4d6dfd7894adc4144135c9#313ed723b2c6fadfcf4d6dfd7894adc4144135c9" +source = "git+https://github.com/apache/arrow-rs.git?rev=363a747dbce79896f37bc2807eb6446d3f5067b0#363a747dbce79896f37bc2807eb6446d3f5067b0" dependencies = [ "arrow-array", "arrow-buffer", @@ -427,7 +427,7 @@ dependencies = [ [[package]] name = "arrow-row" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=313ed723b2c6fadfcf4d6dfd7894adc4144135c9#313ed723b2c6fadfcf4d6dfd7894adc4144135c9" +source = "git+https://github.com/apache/arrow-rs.git?rev=363a747dbce79896f37bc2807eb6446d3f5067b0#363a747dbce79896f37bc2807eb6446d3f5067b0" dependencies = [ "arrow-array", "arrow-buffer", @@ -439,7 +439,7 @@ dependencies = [ [[package]] name = "arrow-schema" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=313ed723b2c6fadfcf4d6dfd7894adc4144135c9#313ed723b2c6fadfcf4d6dfd7894adc4144135c9" +source = "git+https://github.com/apache/arrow-rs.git?rev=363a747dbce79896f37bc2807eb6446d3f5067b0#363a747dbce79896f37bc2807eb6446d3f5067b0" dependencies = [ "bitflags 2.9.1", "serde", @@ -449,7 +449,7 @@ dependencies = [ [[package]] name = "arrow-select" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=313ed723b2c6fadfcf4d6dfd7894adc4144135c9#313ed723b2c6fadfcf4d6dfd7894adc4144135c9" +source = "git+https://github.com/apache/arrow-rs.git?rev=363a747dbce79896f37bc2807eb6446d3f5067b0#363a747dbce79896f37bc2807eb6446d3f5067b0" dependencies = [ "ahash 0.8.12", "arrow-array", @@ -462,7 +462,7 @@ dependencies = [ [[package]] name = "arrow-string" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=313ed723b2c6fadfcf4d6dfd7894adc4144135c9#313ed723b2c6fadfcf4d6dfd7894adc4144135c9" +source = "git+https://github.com/apache/arrow-rs.git?rev=363a747dbce79896f37bc2807eb6446d3f5067b0#363a747dbce79896f37bc2807eb6446d3f5067b0" dependencies = [ "arrow-array", "arrow-buffer", @@ -4392,7 +4392,7 @@ dependencies = [ [[package]] name = "parquet" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=313ed723b2c6fadfcf4d6dfd7894adc4144135c9#313ed723b2c6fadfcf4d6dfd7894adc4144135c9" +source = "git+https://github.com/apache/arrow-rs.git?rev=363a747dbce79896f37bc2807eb6446d3f5067b0#363a747dbce79896f37bc2807eb6446d3f5067b0" dependencies = [ "ahash 0.8.12", "arrow-array", diff --git a/Cargo.toml b/Cargo.toml index de1013cbe931..7594fb684446 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -222,18 +222,18 @@ unused_qualifications = "deny" # pin to https://github.com/apache/arrow-rs/pull/7597 [patch.crates-io] -arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "313ed723b2c6fadfcf4d6dfd7894adc4144135c9" } -arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "313ed723b2c6fadfcf4d6dfd7894adc4144135c9" } -arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "313ed723b2c6fadfcf4d6dfd7894adc4144135c9" } -arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "313ed723b2c6fadfcf4d6dfd7894adc4144135c9" } -arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "313ed723b2c6fadfcf4d6dfd7894adc4144135c9" } -arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "313ed723b2c6fadfcf4d6dfd7894adc4144135c9" } -arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "313ed723b2c6fadfcf4d6dfd7894adc4144135c9" } -arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "313ed723b2c6fadfcf4d6dfd7894adc4144135c9" } -arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "313ed723b2c6fadfcf4d6dfd7894adc4144135c9" } -arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "313ed723b2c6fadfcf4d6dfd7894adc4144135c9" } -arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "313ed723b2c6fadfcf4d6dfd7894adc4144135c9" } -parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "313ed723b2c6fadfcf4d6dfd7894adc4144135c9" } +arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "363a747dbce79896f37bc2807eb6446d3f5067b0" } +arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "363a747dbce79896f37bc2807eb6446d3f5067b0" } +arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "363a747dbce79896f37bc2807eb6446d3f5067b0" } +arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "363a747dbce79896f37bc2807eb6446d3f5067b0" } +arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "363a747dbce79896f37bc2807eb6446d3f5067b0" } +arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "363a747dbce79896f37bc2807eb6446d3f5067b0" } +arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "363a747dbce79896f37bc2807eb6446d3f5067b0" } +arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "363a747dbce79896f37bc2807eb6446d3f5067b0" } +arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "363a747dbce79896f37bc2807eb6446d3f5067b0" } +arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "363a747dbce79896f37bc2807eb6446d3f5067b0" } +arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "363a747dbce79896f37bc2807eb6446d3f5067b0" } +parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "363a747dbce79896f37bc2807eb6446d3f5067b0" } #arrow = { path= "/Users/andrewlamb/Software/arrow-rs/arrow" } #arrow-array = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-array" } From 9e20973387ae4b04914917726c834120e592238d Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 4 Jun 2025 11:41:29 -0400 Subject: [PATCH 05/22] Update tests --- datafusion/core/src/datasource/listing/table.rs | 2 +- datafusion/physical-plan/src/coalesce/mod.rs | 14 +++++++++++--- datafusion/sqllogictest/test_files/joins.slt | 2 +- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs index 3c87d3ee2329..d7a5625d8698 100644 --- a/datafusion/core/src/datasource/listing/table.rs +++ b/datafusion/core/src/datasource/listing/table.rs @@ -2178,7 +2178,7 @@ mod tests { ParquetFormat::default().get_ext(), FileCompressionType::UNCOMPRESSED, Some(config_map), - 2, + 4, ) .await?; Ok(()) diff --git a/datafusion/physical-plan/src/coalesce/mod.rs b/datafusion/physical-plan/src/coalesce/mod.rs index 9b010fbe9b97..feaa2b567350 100644 --- a/datafusion/physical-plan/src/coalesce/mod.rs +++ b/datafusion/physical-plan/src/coalesce/mod.rs @@ -148,7 +148,7 @@ mod tests { // expected to behave the same as `test_concat_batches` .with_target_batch_size(21) .with_fetch(Some(100)) - .with_expected_output_sizes(vec![24, 24, 24, 8]) + .with_expected_output_sizes(vec![21, 21, 21, 17]) .run(); } @@ -160,7 +160,7 @@ mod tests { // input is 10 batches x 8 rows (80 rows) with fetch limit of 50 .with_target_batch_size(21) .with_fetch(Some(50)) - .with_expected_output_sizes(vec![24, 24, 2]) + .with_expected_output_sizes(vec![21, 21, 8]) .run(); } @@ -170,7 +170,7 @@ mod tests { Test::new() .with_batches(std::iter::repeat_n(batch, 10)) // input is 10 batches x 8 rows (80 rows) with fetch limit of 48 - .with_target_batch_size(21) + .with_target_batch_size(24) .with_fetch(Some(48)) .with_expected_output_sizes(vec![24, 24]) .run(); @@ -286,6 +286,14 @@ mod tests { output_batches.push(batch); } + let actual_output_sizes: Vec = + output_batches.iter().map(|b| b.num_rows()).collect(); + assert_eq!( + expected_output_sizes, actual_output_sizes, + "Unexpected number of rows in output batches\n\ + Expected\n{expected_output_sizes:#?}\nActual:{actual_output_sizes:#?}" + ); + // make sure we got the expected number of output batches and content let mut starting_idx = 0; assert_eq!(expected_output_sizes.len(), output_batches.len()); diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt index ccecb9494331..3089f2e2d16a 100644 --- a/datafusion/sqllogictest/test_files/joins.slt +++ b/datafusion/sqllogictest/test_files/joins.slt @@ -2107,9 +2107,9 @@ RIGHT JOIN (select t2_id from join_t2 where join_t2.t2_id > 11) as join_t2 ON join_t1.t1_id < join_t2.t2_id ---- 33 44 +NULL 22 33 55 44 55 -NULL 22 ##### # Configuration teardown From 8918b3ccb025fa741f6248a74b50ee9f13eda0f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Sat, 7 Jun 2025 12:08:13 +0200 Subject: [PATCH 06/22] Update rev --- Cargo.lock | 32 ++++++++++++++++---------------- Cargo.toml | 24 ++++++++++++------------ 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 27d35c75521a..9156ada645f6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -247,7 +247,7 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=363a747dbce79896f37bc2807eb6446d3f5067b0#363a747dbce79896f37bc2807eb6446d3f5067b0" +source = "git+https://github.com/apache/arrow-rs.git?rev=7739a83fe007455dc88e2d926f11d9019905c6a6#7739a83fe007455dc88e2d926f11d9019905c6a6" dependencies = [ "arrow-arith", "arrow-array", @@ -270,7 +270,7 @@ dependencies = [ [[package]] name = "arrow-arith" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=363a747dbce79896f37bc2807eb6446d3f5067b0#363a747dbce79896f37bc2807eb6446d3f5067b0" +source = "git+https://github.com/apache/arrow-rs.git?rev=7739a83fe007455dc88e2d926f11d9019905c6a6#7739a83fe007455dc88e2d926f11d9019905c6a6" dependencies = [ "arrow-array", "arrow-buffer", @@ -283,7 +283,7 @@ dependencies = [ [[package]] name = "arrow-array" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=363a747dbce79896f37bc2807eb6446d3f5067b0#363a747dbce79896f37bc2807eb6446d3f5067b0" +source = "git+https://github.com/apache/arrow-rs.git?rev=7739a83fe007455dc88e2d926f11d9019905c6a6#7739a83fe007455dc88e2d926f11d9019905c6a6" dependencies = [ "ahash 0.8.12", "arrow-buffer", @@ -299,7 +299,7 @@ dependencies = [ [[package]] name = "arrow-buffer" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=363a747dbce79896f37bc2807eb6446d3f5067b0#363a747dbce79896f37bc2807eb6446d3f5067b0" +source = "git+https://github.com/apache/arrow-rs.git?rev=7739a83fe007455dc88e2d926f11d9019905c6a6#7739a83fe007455dc88e2d926f11d9019905c6a6" dependencies = [ "bytes", "half", @@ -309,7 +309,7 @@ dependencies = [ [[package]] name = "arrow-cast" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=363a747dbce79896f37bc2807eb6446d3f5067b0#363a747dbce79896f37bc2807eb6446d3f5067b0" +source = "git+https://github.com/apache/arrow-rs.git?rev=7739a83fe007455dc88e2d926f11d9019905c6a6#7739a83fe007455dc88e2d926f11d9019905c6a6" dependencies = [ "arrow-array", "arrow-buffer", @@ -329,7 +329,7 @@ dependencies = [ [[package]] name = "arrow-csv" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=363a747dbce79896f37bc2807eb6446d3f5067b0#363a747dbce79896f37bc2807eb6446d3f5067b0" +source = "git+https://github.com/apache/arrow-rs.git?rev=7739a83fe007455dc88e2d926f11d9019905c6a6#7739a83fe007455dc88e2d926f11d9019905c6a6" dependencies = [ "arrow-array", "arrow-cast", @@ -344,7 +344,7 @@ dependencies = [ [[package]] name = "arrow-data" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=363a747dbce79896f37bc2807eb6446d3f5067b0#363a747dbce79896f37bc2807eb6446d3f5067b0" +source = "git+https://github.com/apache/arrow-rs.git?rev=7739a83fe007455dc88e2d926f11d9019905c6a6#7739a83fe007455dc88e2d926f11d9019905c6a6" dependencies = [ "arrow-buffer", "arrow-schema", @@ -355,7 +355,7 @@ dependencies = [ [[package]] name = "arrow-flight" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=363a747dbce79896f37bc2807eb6446d3f5067b0#363a747dbce79896f37bc2807eb6446d3f5067b0" +source = "git+https://github.com/apache/arrow-rs.git?rev=7739a83fe007455dc88e2d926f11d9019905c6a6#7739a83fe007455dc88e2d926f11d9019905c6a6" dependencies = [ "arrow-arith", "arrow-array", @@ -381,7 +381,7 @@ dependencies = [ [[package]] name = "arrow-ipc" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=363a747dbce79896f37bc2807eb6446d3f5067b0#363a747dbce79896f37bc2807eb6446d3f5067b0" +source = "git+https://github.com/apache/arrow-rs.git?rev=7739a83fe007455dc88e2d926f11d9019905c6a6#7739a83fe007455dc88e2d926f11d9019905c6a6" dependencies = [ "arrow-array", "arrow-buffer", @@ -394,7 +394,7 @@ dependencies = [ [[package]] name = "arrow-json" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=363a747dbce79896f37bc2807eb6446d3f5067b0#363a747dbce79896f37bc2807eb6446d3f5067b0" +source = "git+https://github.com/apache/arrow-rs.git?rev=7739a83fe007455dc88e2d926f11d9019905c6a6#7739a83fe007455dc88e2d926f11d9019905c6a6" dependencies = [ "arrow-array", "arrow-buffer", @@ -415,7 +415,7 @@ dependencies = [ [[package]] name = "arrow-ord" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=363a747dbce79896f37bc2807eb6446d3f5067b0#363a747dbce79896f37bc2807eb6446d3f5067b0" +source = "git+https://github.com/apache/arrow-rs.git?rev=7739a83fe007455dc88e2d926f11d9019905c6a6#7739a83fe007455dc88e2d926f11d9019905c6a6" dependencies = [ "arrow-array", "arrow-buffer", @@ -427,7 +427,7 @@ dependencies = [ [[package]] name = "arrow-row" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=363a747dbce79896f37bc2807eb6446d3f5067b0#363a747dbce79896f37bc2807eb6446d3f5067b0" +source = "git+https://github.com/apache/arrow-rs.git?rev=7739a83fe007455dc88e2d926f11d9019905c6a6#7739a83fe007455dc88e2d926f11d9019905c6a6" dependencies = [ "arrow-array", "arrow-buffer", @@ -439,7 +439,7 @@ dependencies = [ [[package]] name = "arrow-schema" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=363a747dbce79896f37bc2807eb6446d3f5067b0#363a747dbce79896f37bc2807eb6446d3f5067b0" +source = "git+https://github.com/apache/arrow-rs.git?rev=7739a83fe007455dc88e2d926f11d9019905c6a6#7739a83fe007455dc88e2d926f11d9019905c6a6" dependencies = [ "bitflags 2.9.1", "serde", @@ -449,7 +449,7 @@ dependencies = [ [[package]] name = "arrow-select" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=363a747dbce79896f37bc2807eb6446d3f5067b0#363a747dbce79896f37bc2807eb6446d3f5067b0" +source = "git+https://github.com/apache/arrow-rs.git?rev=7739a83fe007455dc88e2d926f11d9019905c6a6#7739a83fe007455dc88e2d926f11d9019905c6a6" dependencies = [ "ahash 0.8.12", "arrow-array", @@ -462,7 +462,7 @@ dependencies = [ [[package]] name = "arrow-string" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=363a747dbce79896f37bc2807eb6446d3f5067b0#363a747dbce79896f37bc2807eb6446d3f5067b0" +source = "git+https://github.com/apache/arrow-rs.git?rev=7739a83fe007455dc88e2d926f11d9019905c6a6#7739a83fe007455dc88e2d926f11d9019905c6a6" dependencies = [ "arrow-array", "arrow-buffer", @@ -4392,7 +4392,7 @@ dependencies = [ [[package]] name = "parquet" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=363a747dbce79896f37bc2807eb6446d3f5067b0#363a747dbce79896f37bc2807eb6446d3f5067b0" +source = "git+https://github.com/apache/arrow-rs.git?rev=7739a83fe007455dc88e2d926f11d9019905c6a6#7739a83fe007455dc88e2d926f11d9019905c6a6" dependencies = [ "ahash 0.8.12", "arrow-array", diff --git a/Cargo.toml b/Cargo.toml index 7594fb684446..d5aee519156a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -222,18 +222,18 @@ unused_qualifications = "deny" # pin to https://github.com/apache/arrow-rs/pull/7597 [patch.crates-io] -arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "363a747dbce79896f37bc2807eb6446d3f5067b0" } -arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "363a747dbce79896f37bc2807eb6446d3f5067b0" } -arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "363a747dbce79896f37bc2807eb6446d3f5067b0" } -arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "363a747dbce79896f37bc2807eb6446d3f5067b0" } -arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "363a747dbce79896f37bc2807eb6446d3f5067b0" } -arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "363a747dbce79896f37bc2807eb6446d3f5067b0" } -arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "363a747dbce79896f37bc2807eb6446d3f5067b0" } -arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "363a747dbce79896f37bc2807eb6446d3f5067b0" } -arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "363a747dbce79896f37bc2807eb6446d3f5067b0" } -arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "363a747dbce79896f37bc2807eb6446d3f5067b0" } -arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "363a747dbce79896f37bc2807eb6446d3f5067b0" } -parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "363a747dbce79896f37bc2807eb6446d3f5067b0" } +arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "7739a83fe007455dc88e2d926f11d9019905c6a6" } +arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "7739a83fe007455dc88e2d926f11d9019905c6a6" } +arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "7739a83fe007455dc88e2d926f11d9019905c6a6" } +arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "7739a83fe007455dc88e2d926f11d9019905c6a6" } +arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "7739a83fe007455dc88e2d926f11d9019905c6a6" } +arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "7739a83fe007455dc88e2d926f11d9019905c6a6" } +arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "7739a83fe007455dc88e2d926f11d9019905c6a6" } +arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "7739a83fe007455dc88e2d926f11d9019905c6a6" } +arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "7739a83fe007455dc88e2d926f11d9019905c6a6" } +arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "7739a83fe007455dc88e2d926f11d9019905c6a6" } +arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "7739a83fe007455dc88e2d926f11d9019905c6a6" } +parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "7739a83fe007455dc88e2d926f11d9019905c6a6" } #arrow = { path= "/Users/andrewlamb/Software/arrow-rs/arrow" } #arrow-array = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-array" } From 49cb62e7c46d4801569f5451dc8c94b108071ad7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Sat, 7 Jun 2025 12:21:06 +0200 Subject: [PATCH 07/22] Update rev --- Cargo.lock | 32 ++++++++++++++++---------------- Cargo.toml | 24 ++++++++++++------------ 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9156ada645f6..1295e0fe97f6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -247,7 +247,7 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=7739a83fe007455dc88e2d926f11d9019905c6a6#7739a83fe007455dc88e2d926f11d9019905c6a6" +source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" dependencies = [ "arrow-arith", "arrow-array", @@ -270,7 +270,7 @@ dependencies = [ [[package]] name = "arrow-arith" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=7739a83fe007455dc88e2d926f11d9019905c6a6#7739a83fe007455dc88e2d926f11d9019905c6a6" +source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" dependencies = [ "arrow-array", "arrow-buffer", @@ -283,7 +283,7 @@ dependencies = [ [[package]] name = "arrow-array" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=7739a83fe007455dc88e2d926f11d9019905c6a6#7739a83fe007455dc88e2d926f11d9019905c6a6" +source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" dependencies = [ "ahash 0.8.12", "arrow-buffer", @@ -299,7 +299,7 @@ dependencies = [ [[package]] name = "arrow-buffer" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=7739a83fe007455dc88e2d926f11d9019905c6a6#7739a83fe007455dc88e2d926f11d9019905c6a6" +source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" dependencies = [ "bytes", "half", @@ -309,7 +309,7 @@ dependencies = [ [[package]] name = "arrow-cast" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=7739a83fe007455dc88e2d926f11d9019905c6a6#7739a83fe007455dc88e2d926f11d9019905c6a6" +source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" dependencies = [ "arrow-array", "arrow-buffer", @@ -329,7 +329,7 @@ dependencies = [ [[package]] name = "arrow-csv" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=7739a83fe007455dc88e2d926f11d9019905c6a6#7739a83fe007455dc88e2d926f11d9019905c6a6" +source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" dependencies = [ "arrow-array", "arrow-cast", @@ -344,7 +344,7 @@ dependencies = [ [[package]] name = "arrow-data" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=7739a83fe007455dc88e2d926f11d9019905c6a6#7739a83fe007455dc88e2d926f11d9019905c6a6" +source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" dependencies = [ "arrow-buffer", "arrow-schema", @@ -355,7 +355,7 @@ dependencies = [ [[package]] name = "arrow-flight" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=7739a83fe007455dc88e2d926f11d9019905c6a6#7739a83fe007455dc88e2d926f11d9019905c6a6" +source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" dependencies = [ "arrow-arith", "arrow-array", @@ -381,7 +381,7 @@ dependencies = [ [[package]] name = "arrow-ipc" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=7739a83fe007455dc88e2d926f11d9019905c6a6#7739a83fe007455dc88e2d926f11d9019905c6a6" +source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" dependencies = [ "arrow-array", "arrow-buffer", @@ -394,7 +394,7 @@ dependencies = [ [[package]] name = "arrow-json" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=7739a83fe007455dc88e2d926f11d9019905c6a6#7739a83fe007455dc88e2d926f11d9019905c6a6" +source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" dependencies = [ "arrow-array", "arrow-buffer", @@ -415,7 +415,7 @@ dependencies = [ [[package]] name = "arrow-ord" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=7739a83fe007455dc88e2d926f11d9019905c6a6#7739a83fe007455dc88e2d926f11d9019905c6a6" +source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" dependencies = [ "arrow-array", "arrow-buffer", @@ -427,7 +427,7 @@ dependencies = [ [[package]] name = "arrow-row" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=7739a83fe007455dc88e2d926f11d9019905c6a6#7739a83fe007455dc88e2d926f11d9019905c6a6" +source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" dependencies = [ "arrow-array", "arrow-buffer", @@ -439,7 +439,7 @@ dependencies = [ [[package]] name = "arrow-schema" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=7739a83fe007455dc88e2d926f11d9019905c6a6#7739a83fe007455dc88e2d926f11d9019905c6a6" +source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" dependencies = [ "bitflags 2.9.1", "serde", @@ -449,7 +449,7 @@ dependencies = [ [[package]] name = "arrow-select" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=7739a83fe007455dc88e2d926f11d9019905c6a6#7739a83fe007455dc88e2d926f11d9019905c6a6" +source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" dependencies = [ "ahash 0.8.12", "arrow-array", @@ -462,7 +462,7 @@ dependencies = [ [[package]] name = "arrow-string" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=7739a83fe007455dc88e2d926f11d9019905c6a6#7739a83fe007455dc88e2d926f11d9019905c6a6" +source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" dependencies = [ "arrow-array", "arrow-buffer", @@ -4392,7 +4392,7 @@ dependencies = [ [[package]] name = "parquet" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=7739a83fe007455dc88e2d926f11d9019905c6a6#7739a83fe007455dc88e2d926f11d9019905c6a6" +source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" dependencies = [ "ahash 0.8.12", "arrow-array", diff --git a/Cargo.toml b/Cargo.toml index d5aee519156a..6ccea15e6b41 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -222,18 +222,18 @@ unused_qualifications = "deny" # pin to https://github.com/apache/arrow-rs/pull/7597 [patch.crates-io] -arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "7739a83fe007455dc88e2d926f11d9019905c6a6" } -arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "7739a83fe007455dc88e2d926f11d9019905c6a6" } -arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "7739a83fe007455dc88e2d926f11d9019905c6a6" } -arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "7739a83fe007455dc88e2d926f11d9019905c6a6" } -arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "7739a83fe007455dc88e2d926f11d9019905c6a6" } -arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "7739a83fe007455dc88e2d926f11d9019905c6a6" } -arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "7739a83fe007455dc88e2d926f11d9019905c6a6" } -arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "7739a83fe007455dc88e2d926f11d9019905c6a6" } -arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "7739a83fe007455dc88e2d926f11d9019905c6a6" } -arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "7739a83fe007455dc88e2d926f11d9019905c6a6" } -arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "7739a83fe007455dc88e2d926f11d9019905c6a6" } -parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "7739a83fe007455dc88e2d926f11d9019905c6a6" } +arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "95c7b2a36aac01c8d398f6b8a072b20429f391db" } +arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "95c7b2a36aac01c8d398f6b8a072b20429f391db" } +arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "95c7b2a36aac01c8d398f6b8a072b20429f391db" } +arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "95c7b2a36aac01c8d398f6b8a072b20429f391db" } +arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "95c7b2a36aac01c8d398f6b8a072b20429f391db" } +arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "95c7b2a36aac01c8d398f6b8a072b20429f391db" } +arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "95c7b2a36aac01c8d398f6b8a072b20429f391db" } +arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "95c7b2a36aac01c8d398f6b8a072b20429f391db" } +arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "95c7b2a36aac01c8d398f6b8a072b20429f391db" } +arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "95c7b2a36aac01c8d398f6b8a072b20429f391db" } +arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "95c7b2a36aac01c8d398f6b8a072b20429f391db" } +parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "95c7b2a36aac01c8d398f6b8a072b20429f391db" } #arrow = { path= "/Users/andrewlamb/Software/arrow-rs/arrow" } #arrow-array = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-array" } From 140ee9c69aa14bf2071566d3e95d555d16d1945a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Sat, 7 Jun 2025 19:17:13 +0200 Subject: [PATCH 08/22] New rev --- Cargo.lock | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 15 +++++++++++++ 2 files changed, 79 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 1295e0fe97f6..e74d1eb9bfcf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -247,7 +247,11 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" version = "55.1.0" +<<<<<<< Updated upstream source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" +======= +source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" +>>>>>>> Stashed changes dependencies = [ "arrow-arith", "arrow-array", @@ -270,7 +274,11 @@ dependencies = [ [[package]] name = "arrow-arith" version = "55.1.0" +<<<<<<< Updated upstream source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" +======= +source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" +>>>>>>> Stashed changes dependencies = [ "arrow-array", "arrow-buffer", @@ -283,7 +291,11 @@ dependencies = [ [[package]] name = "arrow-array" version = "55.1.0" +<<<<<<< Updated upstream source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" +======= +source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" +>>>>>>> Stashed changes dependencies = [ "ahash 0.8.12", "arrow-buffer", @@ -299,7 +311,11 @@ dependencies = [ [[package]] name = "arrow-buffer" version = "55.1.0" +<<<<<<< Updated upstream source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" +======= +source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" +>>>>>>> Stashed changes dependencies = [ "bytes", "half", @@ -309,7 +325,11 @@ dependencies = [ [[package]] name = "arrow-cast" version = "55.1.0" +<<<<<<< Updated upstream source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" +======= +source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" +>>>>>>> Stashed changes dependencies = [ "arrow-array", "arrow-buffer", @@ -329,7 +349,11 @@ dependencies = [ [[package]] name = "arrow-csv" version = "55.1.0" +<<<<<<< Updated upstream source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" +======= +source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" +>>>>>>> Stashed changes dependencies = [ "arrow-array", "arrow-cast", @@ -344,7 +368,11 @@ dependencies = [ [[package]] name = "arrow-data" version = "55.1.0" +<<<<<<< Updated upstream source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" +======= +source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" +>>>>>>> Stashed changes dependencies = [ "arrow-buffer", "arrow-schema", @@ -355,7 +383,11 @@ dependencies = [ [[package]] name = "arrow-flight" version = "55.1.0" +<<<<<<< Updated upstream source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" +======= +source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" +>>>>>>> Stashed changes dependencies = [ "arrow-arith", "arrow-array", @@ -381,7 +413,11 @@ dependencies = [ [[package]] name = "arrow-ipc" version = "55.1.0" +<<<<<<< Updated upstream source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" +======= +source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" +>>>>>>> Stashed changes dependencies = [ "arrow-array", "arrow-buffer", @@ -394,7 +430,11 @@ dependencies = [ [[package]] name = "arrow-json" version = "55.1.0" +<<<<<<< Updated upstream source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" +======= +source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" +>>>>>>> Stashed changes dependencies = [ "arrow-array", "arrow-buffer", @@ -415,7 +455,11 @@ dependencies = [ [[package]] name = "arrow-ord" version = "55.1.0" +<<<<<<< Updated upstream source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" +======= +source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" +>>>>>>> Stashed changes dependencies = [ "arrow-array", "arrow-buffer", @@ -427,7 +471,11 @@ dependencies = [ [[package]] name = "arrow-row" version = "55.1.0" +<<<<<<< Updated upstream source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" +======= +source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" +>>>>>>> Stashed changes dependencies = [ "arrow-array", "arrow-buffer", @@ -439,7 +487,11 @@ dependencies = [ [[package]] name = "arrow-schema" version = "55.1.0" +<<<<<<< Updated upstream source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" +======= +source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" +>>>>>>> Stashed changes dependencies = [ "bitflags 2.9.1", "serde", @@ -449,7 +501,11 @@ dependencies = [ [[package]] name = "arrow-select" version = "55.1.0" +<<<<<<< Updated upstream source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" +======= +source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" +>>>>>>> Stashed changes dependencies = [ "ahash 0.8.12", "arrow-array", @@ -462,7 +518,11 @@ dependencies = [ [[package]] name = "arrow-string" version = "55.1.0" +<<<<<<< Updated upstream source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" +======= +source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" +>>>>>>> Stashed changes dependencies = [ "arrow-array", "arrow-buffer", @@ -4392,7 +4452,11 @@ dependencies = [ [[package]] name = "parquet" version = "55.1.0" +<<<<<<< Updated upstream source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" +======= +source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" +>>>>>>> Stashed changes dependencies = [ "ahash 0.8.12", "arrow-array", diff --git a/Cargo.toml b/Cargo.toml index 6ccea15e6b41..030145ecf56a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -222,6 +222,7 @@ unused_qualifications = "deny" # pin to https://github.com/apache/arrow-rs/pull/7597 [patch.crates-io] +<<<<<<< Updated upstream arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "95c7b2a36aac01c8d398f6b8a072b20429f391db" } arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "95c7b2a36aac01c8d398f6b8a072b20429f391db" } arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "95c7b2a36aac01c8d398f6b8a072b20429f391db" } @@ -234,6 +235,20 @@ arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "95c7b2a3 arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "95c7b2a36aac01c8d398f6b8a072b20429f391db" } arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "95c7b2a36aac01c8d398f6b8a072b20429f391db" } parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "95c7b2a36aac01c8d398f6b8a072b20429f391db" } +======= +arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "092914f0be3a11d16417a02ef8b031e87a52df0c" } +arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "092914f0be3a11d16417a02ef8b031e87a52df0c" } +arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "092914f0be3a11d16417a02ef8b031e87a52df0c" } +arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "092914f0be3a11d16417a02ef8b031e87a52df0c" } +arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "092914f0be3a11d16417a02ef8b031e87a52df0c" } +arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "092914f0be3a11d16417a02ef8b031e87a52df0c" } +arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "092914f0be3a11d16417a02ef8b031e87a52df0c" } +arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "092914f0be3a11d16417a02ef8b031e87a52df0c" } +arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "092914f0be3a11d16417a02ef8b031e87a52df0c" } +arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "092914f0be3a11d16417a02ef8b031e87a52df0c" } +arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "092914f0be3a11d16417a02ef8b031e87a52df0c" } +parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "092914f0be3a11d16417a02ef8b031e87a52df0c" } +>>>>>>> Stashed changes #arrow = { path= "/Users/andrewlamb/Software/arrow-rs/arrow" } #arrow-array = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-array" } From 5d5683c2ed4a3f3856403588e1d3e9d40bd86b52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Sat, 7 Jun 2025 19:19:22 +0200 Subject: [PATCH 09/22] New rev --- Cargo.toml | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 030145ecf56a..5b80e67f9ce7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -222,20 +222,6 @@ unused_qualifications = "deny" # pin to https://github.com/apache/arrow-rs/pull/7597 [patch.crates-io] -<<<<<<< Updated upstream -arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "95c7b2a36aac01c8d398f6b8a072b20429f391db" } -arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "95c7b2a36aac01c8d398f6b8a072b20429f391db" } -arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "95c7b2a36aac01c8d398f6b8a072b20429f391db" } -arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "95c7b2a36aac01c8d398f6b8a072b20429f391db" } -arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "95c7b2a36aac01c8d398f6b8a072b20429f391db" } -arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "95c7b2a36aac01c8d398f6b8a072b20429f391db" } -arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "95c7b2a36aac01c8d398f6b8a072b20429f391db" } -arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "95c7b2a36aac01c8d398f6b8a072b20429f391db" } -arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "95c7b2a36aac01c8d398f6b8a072b20429f391db" } -arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "95c7b2a36aac01c8d398f6b8a072b20429f391db" } -arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "95c7b2a36aac01c8d398f6b8a072b20429f391db" } -parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "95c7b2a36aac01c8d398f6b8a072b20429f391db" } -======= arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "092914f0be3a11d16417a02ef8b031e87a52df0c" } arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "092914f0be3a11d16417a02ef8b031e87a52df0c" } arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "092914f0be3a11d16417a02ef8b031e87a52df0c" } @@ -248,7 +234,6 @@ arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "092914f0 arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "092914f0be3a11d16417a02ef8b031e87a52df0c" } arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "092914f0be3a11d16417a02ef8b031e87a52df0c" } parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "092914f0be3a11d16417a02ef8b031e87a52df0c" } ->>>>>>> Stashed changes #arrow = { path= "/Users/andrewlamb/Software/arrow-rs/arrow" } #arrow-array = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-array" } From f79dd0950c170fa856dc4bf60345af547324d6ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Sat, 7 Jun 2025 19:21:51 +0200 Subject: [PATCH 10/22] New rev --- Cargo.lock | 64 ------------------------------------------------------ 1 file changed, 64 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e74d1eb9bfcf..6392e11ceafa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -247,11 +247,7 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" version = "55.1.0" -<<<<<<< Updated upstream -source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" -======= source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" ->>>>>>> Stashed changes dependencies = [ "arrow-arith", "arrow-array", @@ -274,11 +270,7 @@ dependencies = [ [[package]] name = "arrow-arith" version = "55.1.0" -<<<<<<< Updated upstream -source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" -======= source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" ->>>>>>> Stashed changes dependencies = [ "arrow-array", "arrow-buffer", @@ -291,11 +283,7 @@ dependencies = [ [[package]] name = "arrow-array" version = "55.1.0" -<<<<<<< Updated upstream -source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" -======= source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" ->>>>>>> Stashed changes dependencies = [ "ahash 0.8.12", "arrow-buffer", @@ -311,11 +299,7 @@ dependencies = [ [[package]] name = "arrow-buffer" version = "55.1.0" -<<<<<<< Updated upstream -source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" -======= source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" ->>>>>>> Stashed changes dependencies = [ "bytes", "half", @@ -325,11 +309,7 @@ dependencies = [ [[package]] name = "arrow-cast" version = "55.1.0" -<<<<<<< Updated upstream -source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" -======= source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" ->>>>>>> Stashed changes dependencies = [ "arrow-array", "arrow-buffer", @@ -349,11 +329,7 @@ dependencies = [ [[package]] name = "arrow-csv" version = "55.1.0" -<<<<<<< Updated upstream -source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" -======= source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" ->>>>>>> Stashed changes dependencies = [ "arrow-array", "arrow-cast", @@ -368,11 +344,7 @@ dependencies = [ [[package]] name = "arrow-data" version = "55.1.0" -<<<<<<< Updated upstream -source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" -======= source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" ->>>>>>> Stashed changes dependencies = [ "arrow-buffer", "arrow-schema", @@ -383,11 +355,7 @@ dependencies = [ [[package]] name = "arrow-flight" version = "55.1.0" -<<<<<<< Updated upstream -source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" -======= source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" ->>>>>>> Stashed changes dependencies = [ "arrow-arith", "arrow-array", @@ -413,11 +381,7 @@ dependencies = [ [[package]] name = "arrow-ipc" version = "55.1.0" -<<<<<<< Updated upstream -source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" -======= source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" ->>>>>>> Stashed changes dependencies = [ "arrow-array", "arrow-buffer", @@ -430,11 +394,7 @@ dependencies = [ [[package]] name = "arrow-json" version = "55.1.0" -<<<<<<< Updated upstream -source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" -======= source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" ->>>>>>> Stashed changes dependencies = [ "arrow-array", "arrow-buffer", @@ -455,11 +415,7 @@ dependencies = [ [[package]] name = "arrow-ord" version = "55.1.0" -<<<<<<< Updated upstream -source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" -======= source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" ->>>>>>> Stashed changes dependencies = [ "arrow-array", "arrow-buffer", @@ -471,11 +427,7 @@ dependencies = [ [[package]] name = "arrow-row" version = "55.1.0" -<<<<<<< Updated upstream -source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" -======= source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" ->>>>>>> Stashed changes dependencies = [ "arrow-array", "arrow-buffer", @@ -487,11 +439,7 @@ dependencies = [ [[package]] name = "arrow-schema" version = "55.1.0" -<<<<<<< Updated upstream -source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" -======= source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" ->>>>>>> Stashed changes dependencies = [ "bitflags 2.9.1", "serde", @@ -501,11 +449,7 @@ dependencies = [ [[package]] name = "arrow-select" version = "55.1.0" -<<<<<<< Updated upstream -source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" -======= source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" ->>>>>>> Stashed changes dependencies = [ "ahash 0.8.12", "arrow-array", @@ -518,11 +462,7 @@ dependencies = [ [[package]] name = "arrow-string" version = "55.1.0" -<<<<<<< Updated upstream -source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" -======= source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" ->>>>>>> Stashed changes dependencies = [ "arrow-array", "arrow-buffer", @@ -4452,11 +4392,7 @@ dependencies = [ [[package]] name = "parquet" version = "55.1.0" -<<<<<<< Updated upstream -source = "git+https://github.com/apache/arrow-rs.git?rev=95c7b2a36aac01c8d398f6b8a072b20429f391db#95c7b2a36aac01c8d398f6b8a072b20429f391db" -======= source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" ->>>>>>> Stashed changes dependencies = [ "ahash 0.8.12", "arrow-array", From 1c44c5cbcc482211bffa864f08e02a44ae218573 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Sat, 7 Jun 2025 19:33:43 +0200 Subject: [PATCH 11/22] cargo fmt --- Cargo.toml | 2 -- 1 file changed, 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 5b80e67f9ce7..ebda1c7d606e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -219,7 +219,6 @@ uninlined_format_args = "warn" unexpected_cfgs = { level = "warn", check-cfg = ["cfg(tarpaulin)"] } unused_qualifications = "deny" - # pin to https://github.com/apache/arrow-rs/pull/7597 [patch.crates-io] arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "092914f0be3a11d16417a02ef8b031e87a52df0c" } @@ -247,4 +246,3 @@ parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "092914f0be3a1 #arrow-ord = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-ord" } #arrow-flight = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-flight" } #parquet = { path= "/Users/andrewlamb/Software/arrow-rs/parquet" } - From ea8b7000d7d804c532cd57b84bdf98c332904503 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 8 Jun 2025 09:49:26 -0400 Subject: [PATCH 12/22] update pin --- Cargo.lock | 32 ++++++++++++++++---------------- Cargo.toml | 26 +++++++++++++------------- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6392e11ceafa..a18d220fbcac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -247,7 +247,7 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" +source = "git+https://github.com/apache/arrow-rs.git?rev=52d8d568f4ddd9069b743fc0eab17ffe102da35b#52d8d568f4ddd9069b743fc0eab17ffe102da35b" dependencies = [ "arrow-arith", "arrow-array", @@ -270,7 +270,7 @@ dependencies = [ [[package]] name = "arrow-arith" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" +source = "git+https://github.com/apache/arrow-rs.git?rev=52d8d568f4ddd9069b743fc0eab17ffe102da35b#52d8d568f4ddd9069b743fc0eab17ffe102da35b" dependencies = [ "arrow-array", "arrow-buffer", @@ -283,7 +283,7 @@ dependencies = [ [[package]] name = "arrow-array" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" +source = "git+https://github.com/apache/arrow-rs.git?rev=52d8d568f4ddd9069b743fc0eab17ffe102da35b#52d8d568f4ddd9069b743fc0eab17ffe102da35b" dependencies = [ "ahash 0.8.12", "arrow-buffer", @@ -299,7 +299,7 @@ dependencies = [ [[package]] name = "arrow-buffer" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" +source = "git+https://github.com/apache/arrow-rs.git?rev=52d8d568f4ddd9069b743fc0eab17ffe102da35b#52d8d568f4ddd9069b743fc0eab17ffe102da35b" dependencies = [ "bytes", "half", @@ -309,7 +309,7 @@ dependencies = [ [[package]] name = "arrow-cast" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" +source = "git+https://github.com/apache/arrow-rs.git?rev=52d8d568f4ddd9069b743fc0eab17ffe102da35b#52d8d568f4ddd9069b743fc0eab17ffe102da35b" dependencies = [ "arrow-array", "arrow-buffer", @@ -329,7 +329,7 @@ dependencies = [ [[package]] name = "arrow-csv" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" +source = "git+https://github.com/apache/arrow-rs.git?rev=52d8d568f4ddd9069b743fc0eab17ffe102da35b#52d8d568f4ddd9069b743fc0eab17ffe102da35b" dependencies = [ "arrow-array", "arrow-cast", @@ -344,7 +344,7 @@ dependencies = [ [[package]] name = "arrow-data" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" +source = "git+https://github.com/apache/arrow-rs.git?rev=52d8d568f4ddd9069b743fc0eab17ffe102da35b#52d8d568f4ddd9069b743fc0eab17ffe102da35b" dependencies = [ "arrow-buffer", "arrow-schema", @@ -355,7 +355,7 @@ dependencies = [ [[package]] name = "arrow-flight" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" +source = "git+https://github.com/apache/arrow-rs.git?rev=52d8d568f4ddd9069b743fc0eab17ffe102da35b#52d8d568f4ddd9069b743fc0eab17ffe102da35b" dependencies = [ "arrow-arith", "arrow-array", @@ -381,7 +381,7 @@ dependencies = [ [[package]] name = "arrow-ipc" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" +source = "git+https://github.com/apache/arrow-rs.git?rev=52d8d568f4ddd9069b743fc0eab17ffe102da35b#52d8d568f4ddd9069b743fc0eab17ffe102da35b" dependencies = [ "arrow-array", "arrow-buffer", @@ -394,7 +394,7 @@ dependencies = [ [[package]] name = "arrow-json" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" +source = "git+https://github.com/apache/arrow-rs.git?rev=52d8d568f4ddd9069b743fc0eab17ffe102da35b#52d8d568f4ddd9069b743fc0eab17ffe102da35b" dependencies = [ "arrow-array", "arrow-buffer", @@ -415,7 +415,7 @@ dependencies = [ [[package]] name = "arrow-ord" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" +source = "git+https://github.com/apache/arrow-rs.git?rev=52d8d568f4ddd9069b743fc0eab17ffe102da35b#52d8d568f4ddd9069b743fc0eab17ffe102da35b" dependencies = [ "arrow-array", "arrow-buffer", @@ -427,7 +427,7 @@ dependencies = [ [[package]] name = "arrow-row" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" +source = "git+https://github.com/apache/arrow-rs.git?rev=52d8d568f4ddd9069b743fc0eab17ffe102da35b#52d8d568f4ddd9069b743fc0eab17ffe102da35b" dependencies = [ "arrow-array", "arrow-buffer", @@ -439,7 +439,7 @@ dependencies = [ [[package]] name = "arrow-schema" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" +source = "git+https://github.com/apache/arrow-rs.git?rev=52d8d568f4ddd9069b743fc0eab17ffe102da35b#52d8d568f4ddd9069b743fc0eab17ffe102da35b" dependencies = [ "bitflags 2.9.1", "serde", @@ -449,7 +449,7 @@ dependencies = [ [[package]] name = "arrow-select" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" +source = "git+https://github.com/apache/arrow-rs.git?rev=52d8d568f4ddd9069b743fc0eab17ffe102da35b#52d8d568f4ddd9069b743fc0eab17ffe102da35b" dependencies = [ "ahash 0.8.12", "arrow-array", @@ -462,7 +462,7 @@ dependencies = [ [[package]] name = "arrow-string" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" +source = "git+https://github.com/apache/arrow-rs.git?rev=52d8d568f4ddd9069b743fc0eab17ffe102da35b#52d8d568f4ddd9069b743fc0eab17ffe102da35b" dependencies = [ "arrow-array", "arrow-buffer", @@ -4392,7 +4392,7 @@ dependencies = [ [[package]] name = "parquet" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=092914f0be3a11d16417a02ef8b031e87a52df0c#092914f0be3a11d16417a02ef8b031e87a52df0c" +source = "git+https://github.com/apache/arrow-rs.git?rev=52d8d568f4ddd9069b743fc0eab17ffe102da35b#52d8d568f4ddd9069b743fc0eab17ffe102da35b" dependencies = [ "ahash 0.8.12", "arrow-array", diff --git a/Cargo.toml b/Cargo.toml index ebda1c7d606e..a15100b0033f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -219,20 +219,20 @@ uninlined_format_args = "warn" unexpected_cfgs = { level = "warn", check-cfg = ["cfg(tarpaulin)"] } unused_qualifications = "deny" -# pin to https://github.com/apache/arrow-rs/pull/7597 +# pin to pre-release arrow [patch.crates-io] -arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "092914f0be3a11d16417a02ef8b031e87a52df0c" } -arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "092914f0be3a11d16417a02ef8b031e87a52df0c" } -arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "092914f0be3a11d16417a02ef8b031e87a52df0c" } -arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "092914f0be3a11d16417a02ef8b031e87a52df0c" } -arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "092914f0be3a11d16417a02ef8b031e87a52df0c" } -arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "092914f0be3a11d16417a02ef8b031e87a52df0c" } -arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "092914f0be3a11d16417a02ef8b031e87a52df0c" } -arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "092914f0be3a11d16417a02ef8b031e87a52df0c" } -arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "092914f0be3a11d16417a02ef8b031e87a52df0c" } -arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "092914f0be3a11d16417a02ef8b031e87a52df0c" } -arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "092914f0be3a11d16417a02ef8b031e87a52df0c" } -parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "092914f0be3a11d16417a02ef8b031e87a52df0c" } +arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } +arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } +arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } +arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } +arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } +arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } +arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } +arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } +arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } +arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } +arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } +parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } #arrow = { path= "/Users/andrewlamb/Software/arrow-rs/arrow" } #arrow-array = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-array" } From 1c615131188675ac7f3a4e9c6e4c192f0f442f12 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 17 Jun 2025 12:17:20 -0400 Subject: [PATCH 13/22] Temp pin to https://github.com/apache/arrow-rs/pull/7650 --- Cargo.lock | 33 ++++++++++++++++----------------- Cargo.toml | 40 ++++++++++++++++++++++++++++------------ 2 files changed, 44 insertions(+), 29 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f95dcd70a5f3..184a2027cb32 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -247,7 +247,7 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=52d8d568f4ddd9069b743fc0eab17ffe102da35b#52d8d568f4ddd9069b743fc0eab17ffe102da35b" +source = "git+https://github.com/alamb/arrow-rs.git?rev=8d4fad51706144034eacc08090865771e9a1426c#8d4fad51706144034eacc08090865771e9a1426c" dependencies = [ "arrow-arith", "arrow-array", @@ -270,7 +270,7 @@ dependencies = [ [[package]] name = "arrow-arith" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=52d8d568f4ddd9069b743fc0eab17ffe102da35b#52d8d568f4ddd9069b743fc0eab17ffe102da35b" +source = "git+https://github.com/alamb/arrow-rs.git?rev=8d4fad51706144034eacc08090865771e9a1426c#8d4fad51706144034eacc08090865771e9a1426c" dependencies = [ "arrow-array", "arrow-buffer", @@ -283,7 +283,7 @@ dependencies = [ [[package]] name = "arrow-array" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=52d8d568f4ddd9069b743fc0eab17ffe102da35b#52d8d568f4ddd9069b743fc0eab17ffe102da35b" +source = "git+https://github.com/alamb/arrow-rs.git?rev=8d4fad51706144034eacc08090865771e9a1426c#8d4fad51706144034eacc08090865771e9a1426c" dependencies = [ "ahash 0.8.12", "arrow-buffer", @@ -299,7 +299,7 @@ dependencies = [ [[package]] name = "arrow-buffer" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=52d8d568f4ddd9069b743fc0eab17ffe102da35b#52d8d568f4ddd9069b743fc0eab17ffe102da35b" +source = "git+https://github.com/alamb/arrow-rs.git?rev=8d4fad51706144034eacc08090865771e9a1426c#8d4fad51706144034eacc08090865771e9a1426c" dependencies = [ "bytes", "half", @@ -309,7 +309,7 @@ dependencies = [ [[package]] name = "arrow-cast" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=52d8d568f4ddd9069b743fc0eab17ffe102da35b#52d8d568f4ddd9069b743fc0eab17ffe102da35b" +source = "git+https://github.com/alamb/arrow-rs.git?rev=8d4fad51706144034eacc08090865771e9a1426c#8d4fad51706144034eacc08090865771e9a1426c" dependencies = [ "arrow-array", "arrow-buffer", @@ -329,7 +329,7 @@ dependencies = [ [[package]] name = "arrow-csv" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=52d8d568f4ddd9069b743fc0eab17ffe102da35b#52d8d568f4ddd9069b743fc0eab17ffe102da35b" +source = "git+https://github.com/alamb/arrow-rs.git?rev=8d4fad51706144034eacc08090865771e9a1426c#8d4fad51706144034eacc08090865771e9a1426c" dependencies = [ "arrow-array", "arrow-cast", @@ -337,14 +337,13 @@ dependencies = [ "chrono", "csv", "csv-core", - "lazy_static", "regex", ] [[package]] name = "arrow-data" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=52d8d568f4ddd9069b743fc0eab17ffe102da35b#52d8d568f4ddd9069b743fc0eab17ffe102da35b" +source = "git+https://github.com/alamb/arrow-rs.git?rev=8d4fad51706144034eacc08090865771e9a1426c#8d4fad51706144034eacc08090865771e9a1426c" dependencies = [ "arrow-buffer", "arrow-schema", @@ -355,7 +354,7 @@ dependencies = [ [[package]] name = "arrow-flight" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=52d8d568f4ddd9069b743fc0eab17ffe102da35b#52d8d568f4ddd9069b743fc0eab17ffe102da35b" +source = "git+https://github.com/alamb/arrow-rs.git?rev=8d4fad51706144034eacc08090865771e9a1426c#8d4fad51706144034eacc08090865771e9a1426c" dependencies = [ "arrow-arith", "arrow-array", @@ -381,7 +380,7 @@ dependencies = [ [[package]] name = "arrow-ipc" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=52d8d568f4ddd9069b743fc0eab17ffe102da35b#52d8d568f4ddd9069b743fc0eab17ffe102da35b" +source = "git+https://github.com/alamb/arrow-rs.git?rev=8d4fad51706144034eacc08090865771e9a1426c#8d4fad51706144034eacc08090865771e9a1426c" dependencies = [ "arrow-array", "arrow-buffer", @@ -394,7 +393,7 @@ dependencies = [ [[package]] name = "arrow-json" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=52d8d568f4ddd9069b743fc0eab17ffe102da35b#52d8d568f4ddd9069b743fc0eab17ffe102da35b" +source = "git+https://github.com/alamb/arrow-rs.git?rev=8d4fad51706144034eacc08090865771e9a1426c#8d4fad51706144034eacc08090865771e9a1426c" dependencies = [ "arrow-array", "arrow-buffer", @@ -415,7 +414,7 @@ dependencies = [ [[package]] name = "arrow-ord" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=52d8d568f4ddd9069b743fc0eab17ffe102da35b#52d8d568f4ddd9069b743fc0eab17ffe102da35b" +source = "git+https://github.com/alamb/arrow-rs.git?rev=8d4fad51706144034eacc08090865771e9a1426c#8d4fad51706144034eacc08090865771e9a1426c" dependencies = [ "arrow-array", "arrow-buffer", @@ -427,7 +426,7 @@ dependencies = [ [[package]] name = "arrow-row" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=52d8d568f4ddd9069b743fc0eab17ffe102da35b#52d8d568f4ddd9069b743fc0eab17ffe102da35b" +source = "git+https://github.com/alamb/arrow-rs.git?rev=8d4fad51706144034eacc08090865771e9a1426c#8d4fad51706144034eacc08090865771e9a1426c" dependencies = [ "arrow-array", "arrow-buffer", @@ -439,7 +438,7 @@ dependencies = [ [[package]] name = "arrow-schema" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=52d8d568f4ddd9069b743fc0eab17ffe102da35b#52d8d568f4ddd9069b743fc0eab17ffe102da35b" +source = "git+https://github.com/alamb/arrow-rs.git?rev=8d4fad51706144034eacc08090865771e9a1426c#8d4fad51706144034eacc08090865771e9a1426c" dependencies = [ "bitflags 2.9.1", "serde", @@ -449,7 +448,7 @@ dependencies = [ [[package]] name = "arrow-select" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=52d8d568f4ddd9069b743fc0eab17ffe102da35b#52d8d568f4ddd9069b743fc0eab17ffe102da35b" +source = "git+https://github.com/alamb/arrow-rs.git?rev=8d4fad51706144034eacc08090865771e9a1426c#8d4fad51706144034eacc08090865771e9a1426c" dependencies = [ "ahash 0.8.12", "arrow-array", @@ -462,7 +461,7 @@ dependencies = [ [[package]] name = "arrow-string" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=52d8d568f4ddd9069b743fc0eab17ffe102da35b#52d8d568f4ddd9069b743fc0eab17ffe102da35b" +source = "git+https://github.com/alamb/arrow-rs.git?rev=8d4fad51706144034eacc08090865771e9a1426c#8d4fad51706144034eacc08090865771e9a1426c" dependencies = [ "arrow-array", "arrow-buffer", @@ -4397,7 +4396,7 @@ dependencies = [ [[package]] name = "parquet" version = "55.1.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=52d8d568f4ddd9069b743fc0eab17ffe102da35b#52d8d568f4ddd9069b743fc0eab17ffe102da35b" +source = "git+https://github.com/alamb/arrow-rs.git?rev=8d4fad51706144034eacc08090865771e9a1426c#8d4fad51706144034eacc08090865771e9a1426c" dependencies = [ "ahash 0.8.12", "arrow-array", diff --git a/Cargo.toml b/Cargo.toml index 587d05cee867..6068b91e474f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -221,19 +221,35 @@ unexpected_cfgs = { level = "warn", check-cfg = ["cfg(tarpaulin)", "cfg(tarpauli unused_qualifications = "deny" # pin to pre-release arrow +#patch.crates-io] +#arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } +#arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } +#arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } +#arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } +#arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } +#arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } +#arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } +#arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } +#arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } +#arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } +#arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } +#parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } + +# pin to https://github.com/apache/arrow-rs/pull/7650 [patch.crates-io] -arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } -arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } -arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } -arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } -arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } -arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } -arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } -arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } -arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } -arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } -arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } -parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } +arrow = { git = "https://github.com/alamb/arrow-rs.git", rev = "8d4fad51706144034eacc08090865771e9a1426c" } +arrow-array = { git = "https://github.com/alamb/arrow-rs.git", rev = "8d4fad51706144034eacc08090865771e9a1426c" } +arrow-buffer = { git = "https://github.com/alamb/arrow-rs.git", rev = "8d4fad51706144034eacc08090865771e9a1426c" } +arrow-cast = { git = "https://github.com/alamb/arrow-rs.git", rev = "8d4fad51706144034eacc08090865771e9a1426c" } +arrow-data = { git = "https://github.com/alamb/arrow-rs.git", rev = "8d4fad51706144034eacc08090865771e9a1426c" } +arrow-ipc = { git = "https://github.com/alamb/arrow-rs.git", rev = "8d4fad51706144034eacc08090865771e9a1426c" } +arrow-schema = { git = "https://github.com/alamb/arrow-rs.git", rev = "8d4fad51706144034eacc08090865771e9a1426c" } +arrow-select = { git = "https://github.com/alamb/arrow-rs.git", rev = "8d4fad51706144034eacc08090865771e9a1426c" } +arrow-string = { git = "https://github.com/alamb/arrow-rs.git", rev = "8d4fad51706144034eacc08090865771e9a1426c" } +arrow-ord = { git = "https://github.com/alamb/arrow-rs.git", rev = "8d4fad51706144034eacc08090865771e9a1426c" } +arrow-flight = { git = "https://github.com/alamb/arrow-rs.git", rev = "8d4fad51706144034eacc08090865771e9a1426c" } +parquet = { git = "https://github.com/alamb/arrow-rs.git", rev = "8d4fad51706144034eacc08090865771e9a1426c" } + #arrow = { path= "/Users/andrewlamb/Software/arrow-rs/arrow" } #arrow-array = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-array" } From ed31ce17d8743c3decd2dfbcb1e4bdce36d8132b Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 17 Jun 2025 12:18:45 -0400 Subject: [PATCH 14/22] Update plans for smaller parquet files --- datafusion/sqllogictest/test_files/explain_tree.slt | 8 ++++---- datafusion/sqllogictest/test_files/limit.slt | 2 +- datafusion/sqllogictest/test_files/repartition_scan.slt | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/datafusion/sqllogictest/test_files/explain_tree.slt b/datafusion/sqllogictest/test_files/explain_tree.slt index 8096c8cacf4c..d8c5d8191d29 100644 --- a/datafusion/sqllogictest/test_files/explain_tree.slt +++ b/datafusion/sqllogictest/test_files/explain_tree.slt @@ -280,7 +280,7 @@ physical_plan 06)┌─────────────┴─────────────┐ 07)│ DataSourceExec │ 08)│ -------------------- │ -09)│ bytes: 1072 │ +09)│ bytes: 1040 │ 10)│ format: memory │ 11)│ rows: 2 │ 12)└───────────────────────────┘ @@ -367,7 +367,7 @@ physical_plan 21)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ 22)│ DataSourceExec ││ CoalesceBatchesExec │ 23)│ -------------------- ││ -------------------- │ -24)│ bytes: 536 ││ target_batch_size: │ +24)│ bytes: 520 ││ target_batch_size: │ 25)│ format: memory ││ 8192 │ 26)│ rows: 1 ││ │ 27)└───────────────────────────┘└─────────────┬─────────────┘ @@ -669,7 +669,7 @@ physical_plan 13)┌─────────────┴─────────────┐ 14)│ DataSourceExec │ 15)│ -------------------- │ -16)│ bytes: 536 │ +16)│ bytes: 520 │ 17)│ format: memory │ 18)│ rows: 1 │ 19)└───────────────────────────┘ @@ -1065,7 +1065,7 @@ physical_plan 13)┌─────────────┴─────────────┐ 14)│ DataSourceExec │ 15)│ -------------------- │ -16)│ bytes: 536 │ +16)│ bytes: 520 │ 17)│ format: memory │ 18)│ rows: 1 │ 19)└───────────────────────────┘ diff --git a/datafusion/sqllogictest/test_files/limit.slt b/datafusion/sqllogictest/test_files/limit.slt index 9d5106bf2caf..8750f52978b0 100644 --- a/datafusion/sqllogictest/test_files/limit.slt +++ b/datafusion/sqllogictest/test_files/limit.slt @@ -854,7 +854,7 @@ physical_plan 01)ProjectionExec: expr=[1 as foo] 02)--SortPreservingMergeExec: [part_key@0 ASC NULLS LAST], fetch=1 03)----SortExec: TopK(fetch=1), expr=[part_key@0 ASC NULLS LAST], preserve_partitioning=[true] -04)------DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_limit_with_partitions/part-0.parquet:0..794], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_limit_with_partitions/part-1.parquet:0..794], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_limit_with_partitions/part-2.parquet:0..794]]}, projection=[part_key], file_type=parquet +04)------DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_limit_with_partitions/part-0.parquet:0..826], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_limit_with_partitions/part-1.parquet:0..826], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_limit_with_partitions/part-2.parquet:0..826]]}, projection=[part_key], file_type=parquet query I with selection as ( diff --git a/datafusion/sqllogictest/test_files/repartition_scan.slt b/datafusion/sqllogictest/test_files/repartition_scan.slt index 2b30de572c8c..0b851f917855 100644 --- a/datafusion/sqllogictest/test_files/repartition_scan.slt +++ b/datafusion/sqllogictest/test_files/repartition_scan.slt @@ -61,7 +61,7 @@ logical_plan physical_plan 01)CoalesceBatchesExec: target_batch_size=8192 02)--FilterExec: column1@0 != 42 -03)----DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..137], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:137..274], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:274..411], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:411..547]]}, projection=[column1], file_type=parquet, predicate=column1@0 != 42, pruning_predicate=column1_null_count@2 != row_count@3 AND (column1_min@0 != 42 OR 42 != column1_max@1), required_guarantees=[column1 not in (42)] +03)----DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..141], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:141..282], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:282..423], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:423..563]]}, projection=[column1], file_type=parquet, predicate=column1@0 != 42, pruning_predicate=column1_null_count@2 != row_count@3 AND (column1_min@0 != 42 OR 42 != column1_max@1), required_guarantees=[column1 not in (42)] # disable round robin repartitioning statement ok @@ -77,7 +77,7 @@ logical_plan physical_plan 01)CoalesceBatchesExec: target_batch_size=8192 02)--FilterExec: column1@0 != 42 -03)----DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..137], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:137..274], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:274..411], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:411..547]]}, projection=[column1], file_type=parquet, predicate=column1@0 != 42, pruning_predicate=column1_null_count@2 != row_count@3 AND (column1_min@0 != 42 OR 42 != column1_max@1), required_guarantees=[column1 not in (42)] +03)----DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..141], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:141..282], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:282..423], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:423..563]]}, projection=[column1], file_type=parquet, predicate=column1@0 != 42, pruning_predicate=column1_null_count@2 != row_count@3 AND (column1_min@0 != 42 OR 42 != column1_max@1), required_guarantees=[column1 not in (42)] # enable round robin repartitioning again statement ok @@ -102,7 +102,7 @@ physical_plan 02)--SortExec: expr=[column1@0 ASC NULLS LAST], preserve_partitioning=[true] 03)----CoalesceBatchesExec: target_batch_size=8192 04)------FilterExec: column1@0 != 42 -05)--------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:0..272], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:272..538, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..6], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:6..278], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:278..547]]}, projection=[column1], file_type=parquet, predicate=column1@0 != 42, pruning_predicate=column1_null_count@2 != row_count@3 AND (column1_min@0 != 42 OR 42 != column1_max@1), required_guarantees=[column1 not in (42)] +05)--------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:0..280], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:280..554, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..6], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:6..286], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:286..563]]}, projection=[column1], file_type=parquet, predicate=column1@0 != 42, pruning_predicate=column1_null_count@2 != row_count@3 AND (column1_min@0 != 42 OR 42 != column1_max@1), required_guarantees=[column1 not in (42)] ## Read the files as though they are ordered @@ -138,7 +138,7 @@ physical_plan 01)SortPreservingMergeExec: [column1@0 ASC NULLS LAST] 02)--CoalesceBatchesExec: target_batch_size=8192 03)----FilterExec: column1@0 != 42 -04)------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:0..269], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..273], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:273..547], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:269..538]]}, projection=[column1], output_ordering=[column1@0 ASC NULLS LAST], file_type=parquet, predicate=column1@0 != 42, pruning_predicate=column1_null_count@2 != row_count@3 AND (column1_min@0 != 42 OR 42 != column1_max@1), required_guarantees=[column1 not in (42)] +04)------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:0..277], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:281..563], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:277..554]]}, projection=[column1], output_ordering=[column1@0 ASC NULLS LAST], file_type=parquet, predicate=column1@0 != 42, pruning_predicate=column1_null_count@2 != row_count@3 AND (column1_min@0 != 42 OR 42 != column1_max@1), required_guarantees=[column1 not in (42)] # Cleanup statement ok From c5bb25ecd54f1d026a603f615af2f3ae730003c1 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 19 Jun 2025 07:33:16 -0400 Subject: [PATCH 15/22] update pin --- Cargo.lock | 32 ++++++++++++++++---------------- Cargo.toml | 24 ++++++++++++------------ 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e41069a293ca..97cd3e0eba10 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -247,7 +247,7 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" version = "55.1.0" -source = "git+https://github.com/alamb/arrow-rs.git?rev=8d4fad51706144034eacc08090865771e9a1426c#8d4fad51706144034eacc08090865771e9a1426c" +source = "git+https://github.com/alamb/arrow-rs.git?rev=99161728b473dc9d0ee1b8a1387ec94110cfc1cc#99161728b473dc9d0ee1b8a1387ec94110cfc1cc" dependencies = [ "arrow-arith", "arrow-array", @@ -270,7 +270,7 @@ dependencies = [ [[package]] name = "arrow-arith" version = "55.1.0" -source = "git+https://github.com/alamb/arrow-rs.git?rev=8d4fad51706144034eacc08090865771e9a1426c#8d4fad51706144034eacc08090865771e9a1426c" +source = "git+https://github.com/alamb/arrow-rs.git?rev=99161728b473dc9d0ee1b8a1387ec94110cfc1cc#99161728b473dc9d0ee1b8a1387ec94110cfc1cc" dependencies = [ "arrow-array", "arrow-buffer", @@ -283,7 +283,7 @@ dependencies = [ [[package]] name = "arrow-array" version = "55.1.0" -source = "git+https://github.com/alamb/arrow-rs.git?rev=8d4fad51706144034eacc08090865771e9a1426c#8d4fad51706144034eacc08090865771e9a1426c" +source = "git+https://github.com/alamb/arrow-rs.git?rev=99161728b473dc9d0ee1b8a1387ec94110cfc1cc#99161728b473dc9d0ee1b8a1387ec94110cfc1cc" dependencies = [ "ahash 0.8.12", "arrow-buffer", @@ -299,7 +299,7 @@ dependencies = [ [[package]] name = "arrow-buffer" version = "55.1.0" -source = "git+https://github.com/alamb/arrow-rs.git?rev=8d4fad51706144034eacc08090865771e9a1426c#8d4fad51706144034eacc08090865771e9a1426c" +source = "git+https://github.com/alamb/arrow-rs.git?rev=99161728b473dc9d0ee1b8a1387ec94110cfc1cc#99161728b473dc9d0ee1b8a1387ec94110cfc1cc" dependencies = [ "bytes", "half", @@ -309,7 +309,7 @@ dependencies = [ [[package]] name = "arrow-cast" version = "55.1.0" -source = "git+https://github.com/alamb/arrow-rs.git?rev=8d4fad51706144034eacc08090865771e9a1426c#8d4fad51706144034eacc08090865771e9a1426c" +source = "git+https://github.com/alamb/arrow-rs.git?rev=99161728b473dc9d0ee1b8a1387ec94110cfc1cc#99161728b473dc9d0ee1b8a1387ec94110cfc1cc" dependencies = [ "arrow-array", "arrow-buffer", @@ -329,7 +329,7 @@ dependencies = [ [[package]] name = "arrow-csv" version = "55.1.0" -source = "git+https://github.com/alamb/arrow-rs.git?rev=8d4fad51706144034eacc08090865771e9a1426c#8d4fad51706144034eacc08090865771e9a1426c" +source = "git+https://github.com/alamb/arrow-rs.git?rev=99161728b473dc9d0ee1b8a1387ec94110cfc1cc#99161728b473dc9d0ee1b8a1387ec94110cfc1cc" dependencies = [ "arrow-array", "arrow-cast", @@ -343,7 +343,7 @@ dependencies = [ [[package]] name = "arrow-data" version = "55.1.0" -source = "git+https://github.com/alamb/arrow-rs.git?rev=8d4fad51706144034eacc08090865771e9a1426c#8d4fad51706144034eacc08090865771e9a1426c" +source = "git+https://github.com/alamb/arrow-rs.git?rev=99161728b473dc9d0ee1b8a1387ec94110cfc1cc#99161728b473dc9d0ee1b8a1387ec94110cfc1cc" dependencies = [ "arrow-buffer", "arrow-schema", @@ -354,7 +354,7 @@ dependencies = [ [[package]] name = "arrow-flight" version = "55.1.0" -source = "git+https://github.com/alamb/arrow-rs.git?rev=8d4fad51706144034eacc08090865771e9a1426c#8d4fad51706144034eacc08090865771e9a1426c" +source = "git+https://github.com/alamb/arrow-rs.git?rev=99161728b473dc9d0ee1b8a1387ec94110cfc1cc#99161728b473dc9d0ee1b8a1387ec94110cfc1cc" dependencies = [ "arrow-arith", "arrow-array", @@ -380,7 +380,7 @@ dependencies = [ [[package]] name = "arrow-ipc" version = "55.1.0" -source = "git+https://github.com/alamb/arrow-rs.git?rev=8d4fad51706144034eacc08090865771e9a1426c#8d4fad51706144034eacc08090865771e9a1426c" +source = "git+https://github.com/alamb/arrow-rs.git?rev=99161728b473dc9d0ee1b8a1387ec94110cfc1cc#99161728b473dc9d0ee1b8a1387ec94110cfc1cc" dependencies = [ "arrow-array", "arrow-buffer", @@ -393,7 +393,7 @@ dependencies = [ [[package]] name = "arrow-json" version = "55.1.0" -source = "git+https://github.com/alamb/arrow-rs.git?rev=8d4fad51706144034eacc08090865771e9a1426c#8d4fad51706144034eacc08090865771e9a1426c" +source = "git+https://github.com/alamb/arrow-rs.git?rev=99161728b473dc9d0ee1b8a1387ec94110cfc1cc#99161728b473dc9d0ee1b8a1387ec94110cfc1cc" dependencies = [ "arrow-array", "arrow-buffer", @@ -414,7 +414,7 @@ dependencies = [ [[package]] name = "arrow-ord" version = "55.1.0" -source = "git+https://github.com/alamb/arrow-rs.git?rev=8d4fad51706144034eacc08090865771e9a1426c#8d4fad51706144034eacc08090865771e9a1426c" +source = "git+https://github.com/alamb/arrow-rs.git?rev=99161728b473dc9d0ee1b8a1387ec94110cfc1cc#99161728b473dc9d0ee1b8a1387ec94110cfc1cc" dependencies = [ "arrow-array", "arrow-buffer", @@ -426,7 +426,7 @@ dependencies = [ [[package]] name = "arrow-row" version = "55.1.0" -source = "git+https://github.com/alamb/arrow-rs.git?rev=8d4fad51706144034eacc08090865771e9a1426c#8d4fad51706144034eacc08090865771e9a1426c" +source = "git+https://github.com/alamb/arrow-rs.git?rev=99161728b473dc9d0ee1b8a1387ec94110cfc1cc#99161728b473dc9d0ee1b8a1387ec94110cfc1cc" dependencies = [ "arrow-array", "arrow-buffer", @@ -438,7 +438,7 @@ dependencies = [ [[package]] name = "arrow-schema" version = "55.1.0" -source = "git+https://github.com/alamb/arrow-rs.git?rev=8d4fad51706144034eacc08090865771e9a1426c#8d4fad51706144034eacc08090865771e9a1426c" +source = "git+https://github.com/alamb/arrow-rs.git?rev=99161728b473dc9d0ee1b8a1387ec94110cfc1cc#99161728b473dc9d0ee1b8a1387ec94110cfc1cc" dependencies = [ "bitflags 2.9.1", "serde", @@ -448,7 +448,7 @@ dependencies = [ [[package]] name = "arrow-select" version = "55.1.0" -source = "git+https://github.com/alamb/arrow-rs.git?rev=8d4fad51706144034eacc08090865771e9a1426c#8d4fad51706144034eacc08090865771e9a1426c" +source = "git+https://github.com/alamb/arrow-rs.git?rev=99161728b473dc9d0ee1b8a1387ec94110cfc1cc#99161728b473dc9d0ee1b8a1387ec94110cfc1cc" dependencies = [ "ahash 0.8.12", "arrow-array", @@ -461,7 +461,7 @@ dependencies = [ [[package]] name = "arrow-string" version = "55.1.0" -source = "git+https://github.com/alamb/arrow-rs.git?rev=8d4fad51706144034eacc08090865771e9a1426c#8d4fad51706144034eacc08090865771e9a1426c" +source = "git+https://github.com/alamb/arrow-rs.git?rev=99161728b473dc9d0ee1b8a1387ec94110cfc1cc#99161728b473dc9d0ee1b8a1387ec94110cfc1cc" dependencies = [ "arrow-array", "arrow-buffer", @@ -4411,7 +4411,7 @@ dependencies = [ [[package]] name = "parquet" version = "55.1.0" -source = "git+https://github.com/alamb/arrow-rs.git?rev=8d4fad51706144034eacc08090865771e9a1426c#8d4fad51706144034eacc08090865771e9a1426c" +source = "git+https://github.com/alamb/arrow-rs.git?rev=99161728b473dc9d0ee1b8a1387ec94110cfc1cc#99161728b473dc9d0ee1b8a1387ec94110cfc1cc" dependencies = [ "ahash 0.8.12", "arrow-array", diff --git a/Cargo.toml b/Cargo.toml index 40e20b33bc79..311672a272f5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -237,18 +237,18 @@ unused_qualifications = "deny" # pin to https://github.com/apache/arrow-rs/pull/7650 [patch.crates-io] -arrow = { git = "https://github.com/alamb/arrow-rs.git", rev = "8d4fad51706144034eacc08090865771e9a1426c" } -arrow-array = { git = "https://github.com/alamb/arrow-rs.git", rev = "8d4fad51706144034eacc08090865771e9a1426c" } -arrow-buffer = { git = "https://github.com/alamb/arrow-rs.git", rev = "8d4fad51706144034eacc08090865771e9a1426c" } -arrow-cast = { git = "https://github.com/alamb/arrow-rs.git", rev = "8d4fad51706144034eacc08090865771e9a1426c" } -arrow-data = { git = "https://github.com/alamb/arrow-rs.git", rev = "8d4fad51706144034eacc08090865771e9a1426c" } -arrow-ipc = { git = "https://github.com/alamb/arrow-rs.git", rev = "8d4fad51706144034eacc08090865771e9a1426c" } -arrow-schema = { git = "https://github.com/alamb/arrow-rs.git", rev = "8d4fad51706144034eacc08090865771e9a1426c" } -arrow-select = { git = "https://github.com/alamb/arrow-rs.git", rev = "8d4fad51706144034eacc08090865771e9a1426c" } -arrow-string = { git = "https://github.com/alamb/arrow-rs.git", rev = "8d4fad51706144034eacc08090865771e9a1426c" } -arrow-ord = { git = "https://github.com/alamb/arrow-rs.git", rev = "8d4fad51706144034eacc08090865771e9a1426c" } -arrow-flight = { git = "https://github.com/alamb/arrow-rs.git", rev = "8d4fad51706144034eacc08090865771e9a1426c" } -parquet = { git = "https://github.com/alamb/arrow-rs.git", rev = "8d4fad51706144034eacc08090865771e9a1426c" } +arrow = { git = "https://github.com/alamb/arrow-rs.git", rev = "99161728b473dc9d0ee1b8a1387ec94110cfc1cc" } +arrow-array = { git = "https://github.com/alamb/arrow-rs.git", rev = "99161728b473dc9d0ee1b8a1387ec94110cfc1cc" } +arrow-buffer = { git = "https://github.com/alamb/arrow-rs.git", rev = "99161728b473dc9d0ee1b8a1387ec94110cfc1cc" } +arrow-cast = { git = "https://github.com/alamb/arrow-rs.git", rev = "99161728b473dc9d0ee1b8a1387ec94110cfc1cc" } +arrow-data = { git = "https://github.com/alamb/arrow-rs.git", rev = "99161728b473dc9d0ee1b8a1387ec94110cfc1cc" } +arrow-ipc = { git = "https://github.com/alamb/arrow-rs.git", rev = "99161728b473dc9d0ee1b8a1387ec94110cfc1cc" } +arrow-schema = { git = "https://github.com/alamb/arrow-rs.git", rev = "99161728b473dc9d0ee1b8a1387ec94110cfc1cc" } +arrow-select = { git = "https://github.com/alamb/arrow-rs.git", rev = "99161728b473dc9d0ee1b8a1387ec94110cfc1cc" } +arrow-string = { git = "https://github.com/alamb/arrow-rs.git", rev = "99161728b473dc9d0ee1b8a1387ec94110cfc1cc" } +arrow-ord = { git = "https://github.com/alamb/arrow-rs.git", rev = "99161728b473dc9d0ee1b8a1387ec94110cfc1cc" } +arrow-flight = { git = "https://github.com/alamb/arrow-rs.git", rev = "99161728b473dc9d0ee1b8a1387ec94110cfc1cc" } +parquet = { git = "https://github.com/alamb/arrow-rs.git", rev = "99161728b473dc9d0ee1b8a1387ec94110cfc1cc" } #arrow = { path= "/Users/andrewlamb/Software/arrow-rs/arrow" } From fe7e6a38a5bcb459fa33b4f45a8ba44160a177e4 Mon Sep 17 00:00:00 2001 From: zhuqi-lucas <821684824@qq.com> Date: Sat, 9 Aug 2025 10:31:17 +0800 Subject: [PATCH 16/22] fix test --- Cargo.lock | 661 +++++++++--------- Cargo.toml | 46 +- .../sqllogictest/test_files/explain_tree.slt | 387 +++++----- datafusion/sqllogictest/test_files/joins.slt | 2 +- datafusion/sqllogictest/test_files/limit.slt | 4 - .../test_files/repartition_scan.slt | 4 - 6 files changed, 484 insertions(+), 620 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a5acdd3c585b..c06d5e6cfc2e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -61,9 +61,9 @@ dependencies = [ [[package]] name = "adler2" -version = "2.0.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" [[package]] name = "adler32" @@ -149,9 +149,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstream" -version = "0.6.18" +version = "0.6.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" +checksum = "301af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933" dependencies = [ "anstyle", "anstyle-parse", @@ -164,36 +164,36 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" +checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" [[package]] name = "anstyle-parse" -version = "0.2.6" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.1.2" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" +checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9" dependencies = [ "windows-sys 0.59.0", ] [[package]] name = "anstyle-wincon" -version = "3.0.7" +version = "3.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e" +checksum = "403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882" dependencies = [ "anstyle", - "once_cell", + "once_cell_polyfill", "windows-sys 0.59.0", ] @@ -265,7 +265,7 @@ dependencies = [ "arrow-select", "arrow-string", "half", - "rand 0.9.1", + "rand 0.9.2", ] [[package]] @@ -295,7 +295,7 @@ dependencies = [ "chrono", "chrono-tz", "half", - "hashbrown 0.15.3", + "hashbrown 0.15.4", "num", ] @@ -595,15 +595,15 @@ checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" [[package]] name = "autocfg" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "aws-config" -version = "1.8.4" +version = "1.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "483020b893cdef3d89637e428d588650c71cfae7ea2e6ecbaee4de4ff99fb2dd" +checksum = "c0baa720ebadea158c5bda642ac444a2af0cdf7bb66b46d1e4533de5d1f449d0" dependencies = [ "aws-credential-types", "aws-runtime", @@ -643,9 +643,9 @@ dependencies = [ [[package]] name = "aws-lc-rs" -version = "1.13.1" +version = "1.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fcc8f365936c834db5514fc45aee5b1202d677e6b40e48468aaaa8183ca8c7" +checksum = "5c953fe1ba023e6b7730c0d4b031d06f267f23a46167dcbd40316644b10a17ba" dependencies = [ "aws-lc-sys", "zeroize", @@ -653,9 +653,9 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.29.0" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61b1d86e7705efe1be1b569bab41d4fa1e14e220b60a160f78de2db687add079" +checksum = "dbfd150b5dbdb988bcc8fb1fe787eb6b7ee6180ca24da683b61ea5405f3d43ff" dependencies = [ "bindgen", "cc", @@ -666,9 +666,9 @@ dependencies = [ [[package]] name = "aws-runtime" -version = "1.5.10" +version = "1.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c034a1bc1d70e16e7f4e4caf7e9f7693e4c9c24cd91cf17c2a0b21abaebc7c8b" +checksum = "b2090e664216c78e766b6bac10fe74d2f451c02441d43484cd76ac9a295075f7" dependencies = [ "aws-credential-types", "aws-sigv4", @@ -690,9 +690,9 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.79.0" +version = "1.78.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a847168f15b46329fa32c7aca4e4f1a2e072f9b422f0adb19756f2e1457f111" +checksum = "dbd7bc4bd34303733bded362c4c997a39130eac4310257c79aae8484b1c4b724" dependencies = [ "aws-credential-types", "aws-runtime", @@ -712,9 +712,9 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.80.0" +version = "1.79.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b654dd24d65568738593e8239aef279a86a15374ec926ae8714e2d7245f34149" +checksum = "77358d25f781bb106c1a69531231d4fd12c6be904edb0c47198c604df5a2dbca" dependencies = [ "aws-credential-types", "aws-runtime", @@ -734,9 +734,9 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.81.0" +version = "1.80.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c92ea8a7602321c83615c82b408820ad54280fb026e92de0eeea937342fafa24" +checksum = "06e3ed2a9b828ae7763ddaed41d51724d2661a50c45f845b08967e52f4939cfc" dependencies = [ "aws-credential-types", "aws-runtime", @@ -757,9 +757,9 @@ dependencies = [ [[package]] name = "aws-sigv4" -version = "1.3.4" +version = "1.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "084c34162187d39e3740cb635acd73c4e3a551a36146ad6fe8883c929c9f876c" +checksum = "ddfb9021f581b71870a17eac25b52335b82211cdc092e02b6876b2bcefa61666" dependencies = [ "aws-credential-types", "aws-smithy-http", @@ -790,9 +790,9 @@ dependencies = [ [[package]] name = "aws-smithy-http" -version = "0.62.3" +version = "0.62.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c4dacf2d38996cf729f55e7a762b30918229917eca115de45dfa8dfb97796c9" +checksum = "43c82ba4cab184ea61f6edaafc1072aad3c2a17dcf4c0fce19ac5694b90d8b5f" dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", @@ -861,9 +861,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime" -version = "1.8.6" +version = "1.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e107ce0783019dbff59b3a244aa0c114e4a8c9d93498af9162608cd5474e796" +checksum = "660f70d9d8af6876b4c9aa8dcb0dbaf0f89b04ee9a4455bea1b4ba03b15f26f6" dependencies = [ "aws-smithy-async", "aws-smithy-http", @@ -1227,9 +1227,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.17.0" +version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" +checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" [[package]] name = "bytecheck" @@ -1321,9 +1321,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.23" +version = "1.2.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f4ac86a9e5bc1e2b3449ab9d7d3a6a405e3d1bb28d7b9be8614f55846ae3766" +checksum = "deec109607ca693028562ed836a5f1c4b8bd77755c4e132fc5ce11b0b6211ae7" dependencies = [ "jobserver", "libc", @@ -1341,9 +1341,9 @@ dependencies = [ [[package]] name = "cfg-if" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" [[package]] name = "cfg_aliases" @@ -1411,7 +1411,7 @@ checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" dependencies = [ "glob", "libc", - "libloading 0.8.7", + "libloading 0.8.8", ] [[package]] @@ -1427,9 +1427,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.43" +version = "4.5.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50fd97c9dc2399518aa331917ac6f274280ec5eb34e555dd291899745c48ec6f" +checksum = "ed87a9d530bb41a67537289bafcac159cb3ee28460e0a4571123d2a778a6a882" dependencies = [ "clap_builder", "clap_derive", @@ -1437,9 +1437,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.43" +version = "4.5.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c35b5830294e1fa0462034af85cc95225a4cb07092c088c55bda3147cfcd8f65" +checksum = "64f4f3f3c77c94aff3c7e9aac9a2ca1974a5adf392a8bb751e827d6d127ab966" dependencies = [ "anstream", "anstyle", @@ -1461,15 +1461,15 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.4" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" +checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" [[package]] name = "clipboard-win" -version = "5.4.0" +version = "5.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15efe7a882b08f34e38556b14f2fb3daa98769d06c7f0c1b076dfd0d983bc892" +checksum = "bde03770d3df201d4fb868f2c9c59e66a3e4e2bd06692a0fe701e7103c7e84d4" dependencies = [ "error-code", ] @@ -1485,9 +1485,9 @@ dependencies = [ [[package]] name = "colorchoice" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" [[package]] name = "comfy-table" @@ -1496,7 +1496,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a65ebfec4fb190b6f90e944a817d60499ee0744e582530e2c9900a22e591d9a" dependencies = [ "unicode-segmentation", - "unicode-width 0.2.0", + "unicode-width 0.2.1", ] [[package]] @@ -1520,7 +1520,7 @@ dependencies = [ "encode_unicode", "libc", "once_cell", - "unicode-width 0.2.0", + "unicode-width 0.2.1", "windows-sys 0.60.2", ] @@ -1556,9 +1556,9 @@ dependencies = [ [[package]] name = "const_panic" -version = "0.2.12" +version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2459fc9262a1aa204eb4b5764ad4f189caec88aea9634389c0a25f8be7f6265e" +checksum = "b98d1483e98c9d67f341ab4b3915cfdc54740bd6f5cccc9226ee0535d86aa8fb" [[package]] name = "constant_time_eq" @@ -1568,9 +1568,9 @@ checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" [[package]] name = "core-foundation" -version = "0.10.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b55271e5c8c478ad3f38ad24ef34923091e0548492a266d19b3c0b4d82574c63" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" dependencies = [ "core-foundation-sys", "libc", @@ -1617,9 +1617,9 @@ dependencies = [ [[package]] name = "crc32fast" -version = "1.4.2" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" dependencies = [ "cfg-if", ] @@ -1633,7 +1633,7 @@ dependencies = [ "anes", "cast", "ciborium", - "clap 4.5.43", + "clap 4.5.42", "criterion-plot", "futures", "is-terminal", @@ -1698,9 +1698,9 @@ checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" [[package]] name = "crunchy" -version = "0.2.3" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" [[package]] name = "crypto-common" @@ -1865,7 +1865,7 @@ dependencies = [ "parking_lot", "parquet", "paste", - "rand 0.9.1", + "rand 0.9.2", "rand_distr", "regex", "rstest", @@ -1897,7 +1897,7 @@ dependencies = [ "mimalloc", "object_store", "parquet", - "rand 0.9.1", + "rand 0.9.2", "regex", "serde", "serde_json", @@ -1962,7 +1962,7 @@ dependencies = [ "async-trait", "aws-config", "aws-credential-types", - "clap 4.5.43", + "clap 4.5.42", "ctor", "datafusion", "dirs", @@ -2006,7 +2006,7 @@ dependencies = [ "parquet", "paste", "pyo3", - "rand 0.9.1", + "rand 0.9.2", "recursive", "sqlparser", "tokio", @@ -2048,7 +2048,7 @@ dependencies = [ "log", "object_store", "parquet", - "rand 0.9.1", + "rand 0.9.2", "tempfile", "tokio", "tokio-util", @@ -2156,7 +2156,7 @@ dependencies = [ "object_store", "parking_lot", "parquet", - "rand 0.9.1", + "rand 0.9.2", "tokio", ] @@ -2209,7 +2209,7 @@ dependencies = [ "log", "object_store", "parking_lot", - "rand 0.9.1", + "rand 0.9.2", "tempfile", "url", ] @@ -2290,7 +2290,7 @@ dependencies = [ "itertools 0.14.0", "log", "md-5", - "rand 0.9.1", + "rand 0.9.2", "regex", "sha2", "tokio", @@ -2316,7 +2316,7 @@ dependencies = [ "half", "log", "paste", - "rand 0.9.1", + "rand 0.9.2", ] [[package]] @@ -2329,7 +2329,7 @@ dependencies = [ "datafusion-common", "datafusion-expr-common", "datafusion-physical-expr-common", - "rand 0.9.1", + "rand 0.9.2", ] [[package]] @@ -2351,7 +2351,7 @@ dependencies = [ "itertools 0.14.0", "log", "paste", - "rand 0.9.1", + "rand 0.9.2", ] [[package]] @@ -2449,7 +2449,7 @@ dependencies = [ "log", "paste", "petgraph 0.8.2", - "rand 0.9.1", + "rand 0.9.2", "rstest", ] @@ -2515,7 +2515,7 @@ dependencies = [ "log", "parking_lot", "pin-project-lite", - "rand 0.9.1", + "rand 0.9.2", "rstest", "rstest_reuse", "tempfile", @@ -2614,7 +2614,7 @@ dependencies = [ "datafusion-functions", "datafusion-macros", "log", - "rand 0.9.1", + "rand 0.9.2", "sha1", "url", "xxhash-rust", @@ -2653,7 +2653,7 @@ dependencies = [ "bigdecimal", "bytes", "chrono", - "clap 4.5.43", + "clap 4.5.42", "datafusion", "datafusion-spark", "datafusion-substrait", @@ -2770,7 +2770,7 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -2824,9 +2824,9 @@ checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" [[package]] name = "dyn-clone" -version = "1.0.19" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c7a8fb8a9fbf66c1f703fe16184d10ca0ee9d23be5b4436400408ba54a95005" +checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" [[package]] name = "educe" @@ -2909,12 +2909,12 @@ checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "errno" -version = "0.3.12" +version = "0.3.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cea14ef9355e3beab063703aa9dab15afd25f0667c341310c1e5274bb1d0da18" +checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -2959,8 +2959,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ce92ff622d6dadf7349484f42c93271a0d49b7cc4d466a936405bacbe10aa78" dependencies = [ "cfg-if", - "rustix 1.0.7", - "windows-sys 0.59.0", + "rustix 1.0.8", + "windows-sys 0.52.0", ] [[package]] @@ -3064,9 +3064,9 @@ dependencies = [ [[package]] name = "fs-err" -version = "3.1.0" +version = "3.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f89bda4c2a21204059a977ed3bfe746677dfd137b83c339e702b0ac91d482aa" +checksum = "88d7be93788013f265201256d58f04936a8079ad5dc898743aa20525f503b683" dependencies = [ "autocfg", ] @@ -3222,7 +3222,7 @@ dependencies = [ "cfg-if", "js-sys", "libc", - "wasi 0.11.0+wasi-snapshot-preview1", + "wasi 0.11.1+wasi-snapshot-preview1", "wasm-bindgen", ] @@ -3267,9 +3267,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.4.10" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9421a676d1b147b16b82c9225157dc629087ef8ec4d5e2960f9437a90dac0a5" +checksum = "17da50a276f1e01e0ba6c029e47b7100754904ee8a278f886546e98575380785" dependencies = [ "atomic-waker", "bytes", @@ -3316,9 +3316,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.15.3" +version = "0.15.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3" +checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" dependencies = [ "allocator-api2", "equivalent", @@ -3342,9 +3342,9 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "hermit-abi" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f154ce46856750ed433c8649605bf7ed2de3bc35fd9d2a9f30cddd873c80cb08" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" [[package]] name = "hex" @@ -3482,11 +3482,10 @@ dependencies = [ [[package]] name = "hyper-rustls" -version = "0.27.5" +version = "0.27.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d191583f3da1305256f22463b9bb0471acad48a4e534a5218b9963e9c1f59b2" +checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" dependencies = [ - "futures-util", "http 1.3.1", "hyper", "hyper-util", @@ -3513,19 +3512,23 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.12" +version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf9f1e950e0d9d1d3c47184416723cf29c0d1f93bd8cccf37e4beb6b44f31710" +checksum = "8d9b05277c7e8da2c93a568989bb6207bef0112e8d17df7a6eda4a3cf143bc5e" dependencies = [ + "base64 0.22.1", "bytes", "futures-channel", + "futures-core", "futures-util", "http 1.3.1", "http-body 1.0.1", "hyper", + "ipnet", "libc", + "percent-encoding", "pin-project-lite", - "socket2 0.5.9", + "socket2 0.5.10", "tokio", "tower-service", "tracing", @@ -3701,7 +3704,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661" dependencies = [ "equivalent", - "hashbrown 0.15.3", + "hashbrown 0.15.4", "serde", ] @@ -3713,7 +3716,7 @@ checksum = "70a646d946d06bedbbc4cac4c218acf4bbf2d87757a784857025f4d447e4e1cd" dependencies = [ "console 0.16.0", "portable-atomic", - "unicode-width 0.2.0", + "unicode-width 0.2.1", "unit-prefix", "web-time", ] @@ -3773,6 +3776,16 @@ version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" +[[package]] +name = "iri-string" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbc5ebe9c3a1a7a5127f920a418f7585e9e758e911d0466ed004f393b0e380b2" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "is-terminal" version = "0.4.16" @@ -3781,7 +3794,7 @@ checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9" dependencies = [ "hermit-abi", "libc", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -3834,9 +3847,9 @@ checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] name = "jiff" -version = "0.2.14" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a194df1107f33c79f4f93d02c80798520551949d59dfad22b6157048a88cca93" +checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49" dependencies = [ "jiff-static", "log", @@ -3847,9 +3860,9 @@ dependencies = [ [[package]] name = "jiff-static" -version = "0.2.14" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c6e1db7ed32c6c71b759497fae34bf7933636f75a251b9e736555da426f6442" +checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4" dependencies = [ "proc-macro2", "quote", @@ -4000,12 +4013,12 @@ dependencies = [ [[package]] name = "libloading" -version = "0.8.7" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a793df0d7afeac54f95b471d3af7f0d4fb975699f972341a4b76988d49cdf0c" +checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667" dependencies = [ "cfg-if", - "windows-targets 0.52.6", + "windows-targets 0.53.3", ] [[package]] @@ -4027,13 +4040,13 @@ dependencies = [ [[package]] name = "libredox" -version = "0.1.3" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" +checksum = "391290121bad3d37fbddad76d8f5d1c1c314cfc646d143d7e07a3086ddff0ce3" dependencies = [ "bitflags 2.9.1", "libc", - "redox_syscall 0.5.12", + "redox_syscall 0.5.17", ] [[package]] @@ -4044,7 +4057,7 @@ checksum = "5297962ef19edda4ce33aaa484386e0a5b3d7f2f4e037cbeee00503ef6b29d33" dependencies = [ "anstream", "anstyle", - "clap 4.5.43", + "clap 4.5.42", "escape8259", ] @@ -4077,9 +4090,9 @@ checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956" [[package]] name = "lock_api" -version = "0.4.12" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" dependencies = [ "autocfg", "scopeguard", @@ -4099,11 +4112,11 @@ checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" [[package]] name = "lz4_flex" -version = "0.11.3" +version = "0.11.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5" +checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a" dependencies = [ - "twox-hash 1.6.3", + "twox-hash", ] [[package]] @@ -4135,9 +4148,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.4" +version = "2.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" [[package]] name = "memoffset" @@ -4181,22 +4194,22 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.8.8" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3be647b768db090acb35d5ec5db2b0e1f1de11133ca123b9eacf5137868f892a" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ "adler2", ] [[package]] name = "mio" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" +checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" dependencies = [ "libc", - "wasi 0.11.0+wasi-snapshot-preview1", - "windows-sys 0.52.0", + "wasi 0.11.1+wasi-snapshot-preview1", + "windows-sys 0.59.0", ] [[package]] @@ -4391,7 +4404,7 @@ dependencies = [ "parking_lot", "percent-encoding", "quick-xml", - "rand 0.9.1", + "rand 0.9.2", "reqwest", "ring", "rustls-pemfile", @@ -4413,6 +4426,12 @@ version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +[[package]] +name = "once_cell_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" + [[package]] name = "oorandom" version = "11.1.5" @@ -4454,15 +4473,15 @@ checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" [[package]] name = "owo-colors" -version = "4.2.1" +version = "4.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26995317201fa17f3656c36716aed4a7c81743a9634ac4c99c0eeda495db0cec" +checksum = "48dd4f4a2c8405440fd0462561f0e5806bd0f77e86f51c761481bdd4018b545e" [[package]] name = "parking_lot" -version = "0.12.3" +version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13" dependencies = [ "lock_api", "parking_lot_core", @@ -4470,13 +4489,13 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.10" +version = "0.9.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.5.12", + "redox_syscall 0.5.17", "smallvec", "windows-targets 0.52.6", ] @@ -4502,7 +4521,7 @@ dependencies = [ "flate2", "futures", "half", - "hashbrown 0.15.3", + "hashbrown 0.15.4", "lz4_flex", "num", "num-bigint", @@ -4514,7 +4533,7 @@ dependencies = [ "snap", "thrift", "tokio", - "twox-hash 2.1.0", + "twox-hash", "zstd", ] @@ -4609,7 +4628,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "54acf3a685220b533e437e264e4d932cfbdc4cc7ec0cd232ed73c08d03b8a7ca" dependencies = [ "fixedbitset", - "hashbrown 0.15.3", + "hashbrown 0.15.4", "indexmap 2.10.0", "serde", ] @@ -4718,9 +4737,9 @@ dependencies = [ [[package]] name = "portable-atomic" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e" +checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" [[package]] name = "portable-atomic-util" @@ -4756,7 +4775,7 @@ dependencies = [ "hmac", "md-5", "memchr", - "rand 0.9.1", + "rand 0.9.2", "sha2", "stringprep", ] @@ -4840,9 +4859,9 @@ dependencies = [ [[package]] name = "prettyplease" -version = "0.2.32" +version = "0.2.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "664ec5419c51e34154eec046ebcba56312d5a2fc3b09a06da188e1ad21afadf6" +checksum = "ff24dfcda44452b9816fff4cd4227e1bb73ff5a2f1bc1105aa92fb8565ce44d2" dependencies = [ "proc-macro2", "syn 2.0.104", @@ -5050,9 +5069,9 @@ checksum = "5a651516ddc9168ebd67b24afd085a718be02f8858fe406591b013d101ce2f40" [[package]] name = "quick-xml" -version = "0.38.1" +version = "0.38.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9845d9dccf565065824e69f9f235fafba1587031eda353c1f1561cd6a6be78f4" +checksum = "8927b0664f5c5a98265138b7e3f90aa19a6b21353182469ace36d4ac527b7b1b" dependencies = [ "memchr", "serde", @@ -5071,7 +5090,7 @@ dependencies = [ "quinn-udp", "rustc-hash 2.1.1", "rustls", - "socket2 0.5.9", + "socket2 0.5.10", "thiserror 2.0.12", "tokio", "tracing", @@ -5087,7 +5106,7 @@ dependencies = [ "bytes", "getrandom 0.3.3", "lru-slab", - "rand 0.9.1", + "rand 0.9.2", "ring", "rustc-hash 2.1.1", "rustls", @@ -5101,16 +5120,16 @@ dependencies = [ [[package]] name = "quinn-udp" -version = "0.5.12" +version = "0.5.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee4e529991f949c5e25755532370b8af5d114acae52326361d68d47af64aa842" +checksum = "fcebb1209ee276352ef14ff8732e24cc2b02bbac986cd74a4c81bcb2f9881970" dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.5.9", + "socket2 0.5.10", "tracing", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -5124,9 +5143,9 @@ dependencies = [ [[package]] name = "r-efi" -version = "5.2.0" +version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" [[package]] name = "radium" @@ -5157,9 +5176,9 @@ dependencies = [ [[package]] name = "rand" -version = "0.9.1" +version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ "rand_chacha 0.9.0", "rand_core 0.9.3", @@ -5210,7 +5229,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463" dependencies = [ "num-traits", - "rand 0.9.1", + "rand 0.9.2", ] [[package]] @@ -5264,24 +5283,44 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.12" +version = "0.5.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "928fca9cf2aa042393a8325b9ead81d2f0df4cb12e1e24cef072922ccd99c5af" +checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77" dependencies = [ "bitflags 2.9.1", ] [[package]] name = "redox_users" -version = "0.5.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd6f9d3d47bdd2ad6945c5015a226ec6155d0bcdfd8f7cd29f86b71f8de99d2b" +checksum = "78eaea1f52c56d57821be178b2d47e09ff26481a6042e8e042fcb0ced068b470" dependencies = [ "getrandom 0.2.16", "libredox", "thiserror 2.0.12", ] +[[package]] +name = "ref-cast" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a0ae411dbe946a674d89546582cea4ba2bb8defac896622d6496f14c23ba5cf" +dependencies = [ + "ref-cast-impl", +] + +[[package]] +name = "ref-cast-impl" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1165225c21bff1f3bbce98f5a1f889949bc902d3575308cc7b0de30b4f6d27c7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.104", +] + [[package]] name = "regex" version = "1.11.1" @@ -5319,11 +5358,11 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "regress" -version = "0.10.3" +version = "0.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78ef7fa9ed0256d64a688a3747d0fef7a88851c18a5e1d57f115f38ec2e09366" +checksum = "145bb27393fe455dd64d6cbc8d059adfa392590a45eadf079c01b11857e7b010" dependencies = [ - "hashbrown 0.15.3", + "hashbrown 0.15.4", "memchr", ] @@ -5353,9 +5392,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.12.15" +version = "0.12.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d19c46a6fdd48bc4dab94b6103fccc55d34c67cc0ad04653aad4ea2a07cd7bbb" +checksum = "cbc931937e6ca3a06e3b6c0aa7841849b160a90351d6ab467a8b9b9959767531" dependencies = [ "base64 0.22.1", "bytes", @@ -5368,17 +5407,13 @@ dependencies = [ "hyper", "hyper-rustls", "hyper-util", - "ipnet", "js-sys", "log", - "mime", - "once_cell", "percent-encoding", "pin-project-lite", "quinn", "rustls", "rustls-native-certs", - "rustls-pemfile", "rustls-pki-types", "serde", "serde_json", @@ -5388,13 +5423,13 @@ dependencies = [ "tokio-rustls", "tokio-util", "tower", + "tower-http", "tower-service", "url", "wasm-bindgen", "wasm-bindgen-futures", "wasm-streams", "web-sys", - "windows-registry", ] [[package]] @@ -5506,9 +5541,9 @@ dependencies = [ [[package]] name = "rustc-demangle" -version = "0.1.24" +version = "0.1.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" +checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" [[package]] name = "rustc-hash" @@ -5541,27 +5576,27 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.4.15", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] name = "rustix" -version = "1.0.7" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266" +checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8" dependencies = [ "bitflags 2.9.1", "errno", "libc", "linux-raw-sys 0.9.4", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] name = "rustls" -version = "0.23.27" +version = "0.23.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "730944ca083c1c233a75c09f199e973ca499344a2b7ba9e755c457e86fb4a321" +checksum = "c0ebcbd2f03de0fc1122ad9bb24b127a5a6cd51d72604a3f3c50ac459762b6cc" dependencies = [ "aws-lc-rs", "once_cell", @@ -5605,9 +5640,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.103.3" +version = "0.103.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4a72fe2bcf7a6ac6fd7d0b9e5cb68aeb7d4c0a0271730218b3e92d43b4eb435" +checksum = "0a17884ae0c1b773f1ccd2bd4a8c72f16da897310a98b0e84bf349ad5ead92fc" dependencies = [ "aws-lc-rs", "ring", @@ -5617,9 +5652,9 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.20" +version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2" +checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" [[package]] name = "rustyline" @@ -5638,7 +5673,7 @@ dependencies = [ "nix", "radix_trie", "unicode-segmentation", - "unicode-width 0.2.0", + "unicode-width 0.2.1", "utf8parse", "windows-sys 0.59.0", ] @@ -5679,6 +5714,30 @@ dependencies = [ "serde_json", ] +[[package]] +name = "schemars" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd191f9397d57d581cddd31014772520aa448f65ef991055d7f61582c65165f" +dependencies = [ + "dyn-clone", + "ref-cast", + "serde", + "serde_json", +] + +[[package]] +name = "schemars" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82d20c4491bc164fa2f6c5d44565947a52ad80b9505d8e36f8d54c27c739fcd0" +dependencies = [ + "dyn-clone", + "ref-cast", + "serde", + "serde_json", +] + [[package]] name = "schemars_derive" version = "0.8.22" @@ -5783,9 +5842,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.140" +version = "1.0.142" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" +checksum = "030fedb782600dcbd6f02d479bf0d817ac3bb40d644745b769d6a96bc3afc5a7" dependencies = [ "itoa", "memchr", @@ -5830,15 +5889,17 @@ dependencies = [ [[package]] name = "serde_with" -version = "3.12.0" +version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6b6f7f2fcb69f747921f79f3926bd1e203fce4fef62c268dd3abfb6d86029aa" +checksum = "f2c45cd61fefa9db6f254525d46e392b852e0e61d9a1fd36e5bd183450a556d5" dependencies = [ "base64 0.22.1", "chrono", "hex", "indexmap 1.9.3", "indexmap 2.10.0", + "schemars 0.9.0", + "schemars 1.0.4", "serde", "serde_derive", "serde_json", @@ -5848,9 +5909,9 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "3.12.0" +version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d00caa5193a3c8362ac2b73be6b9e768aa5a4b2f721d8f4b339600c3cb51f8e" +checksum = "de90945e6565ce0d9a25098082ed4ee4002e047cb59892c318d66821e14bb30f" dependencies = [ "darling", "proc-macro2", @@ -5937,18 +5998,15 @@ checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" [[package]] name = "slab" -version = "0.4.9" +version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" -dependencies = [ - "autocfg", -] +checksum = "04dc19736151f35336d325007ac991178d504a119863a2fcb3758cdb5e52c50d" [[package]] name = "smallvec" -version = "1.15.0" +version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8917285742e9f3e1683f0a9c4e6b57960b7314d0b08d30d1ecd426713ee2eee9" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] name = "snap" @@ -5976,9 +6034,9 @@ dependencies = [ [[package]] name = "socket2" -version = "0.5.9" +version = "0.5.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f5fd57c80058a56cf5c777ab8a126398ece8e442983605d280a44ce79d0edef" +checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" dependencies = [ "libc", "windows-sys 0.52.0", @@ -6057,7 +6115,7 @@ dependencies = [ "cfg-if", "libc", "psm", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -6175,7 +6233,7 @@ dependencies = [ "prost-types", "protobuf-src", "regress", - "schemars", + "schemars 0.8.22", "semver", "serde", "serde_json", @@ -6268,8 +6326,8 @@ dependencies = [ "fastrand", "getrandom 0.3.3", "once_cell", - "rustix 1.0.7", - "windows-sys 0.59.0", + "rustix 1.0.8", + "windows-sys 0.52.0", ] [[package]] @@ -6286,7 +6344,7 @@ dependencies = [ "chrono-tz", "datafusion-common", "env_logger", - "rand 0.9.1", + "rand 0.9.2", ] [[package]] @@ -6378,12 +6436,11 @@ dependencies = [ [[package]] name = "thread_local" -version = "1.1.8" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" dependencies = [ "cfg-if", - "once_cell", ] [[package]] @@ -6522,8 +6579,8 @@ dependencies = [ "pin-project-lite", "postgres-protocol", "postgres-types", - "rand 0.9.1", - "socket2 0.5.9", + "rand 0.9.2", + "socket2 0.5.10", "tokio", "tokio-util", "whoami", @@ -6580,15 +6637,15 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.6.9" +version = "0.6.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3da5db5a963e24bc68be8b17b6fa82814bb22ee8660f192bb182771d498f09a3" +checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" [[package]] name = "toml_edit" -version = "0.22.26" +version = "0.22.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "310068873db2c5b3e7659d2cc35d21855dbafa50d1ce336397c666e3cb08137e" +checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" dependencies = [ "indexmap 2.10.0", "toml_datetime", @@ -6615,7 +6672,7 @@ dependencies = [ "percent-encoding", "pin-project", "prost", - "socket2 0.5.9", + "socket2 0.5.10", "tokio", "tokio-stream", "tower", @@ -6643,6 +6700,24 @@ dependencies = [ "tracing", ] +[[package]] +name = "tower-http" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2" +dependencies = [ + "bitflags 2.9.1", + "bytes", + "futures-util", + "http 1.3.1", + "http-body 1.0.1", + "iri-string", + "pin-project-lite", + "tower", + "tower-layer", + "tower-service", +] + [[package]] name = "tower-layer" version = "0.3.3" @@ -6668,9 +6743,9 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.28" +version = "0.1.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" +checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" dependencies = [ "proc-macro2", "quote", @@ -6679,9 +6754,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.33" +version = "0.1.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" +checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" dependencies = [ "once_cell", "valuable", @@ -6735,19 +6810,9 @@ checksum = "e78122066b0cb818b8afd08f7ed22f7fdbc3e90815035726f0840d0d26c0747a" [[package]] name = "twox-hash" -version = "1.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" -dependencies = [ - "cfg-if", - "static_assertions", -] - -[[package]] -name = "twox-hash" -version = "2.1.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7b17f197b3050ba473acf9181f7b1d3b66d1cf7356c6cc57886662276e65908" +checksum = "8b907da542cbced5261bd3256de1b3a1bf340a3d37f93425a07362a1d687de56" [[package]] name = "typed-arena" @@ -6783,9 +6848,9 @@ checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" [[package]] name = "typify" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcc5bec3cdff70fd542e579aa2e52967833e543a25fae0d14579043d2e868a50" +checksum = "6c6c647a34e851cf0260ccc14687f17cdcb8302ff1a8a687a24b97ca0f82406f" dependencies = [ "typify-impl", "typify-macro", @@ -6793,16 +6858,16 @@ dependencies = [ [[package]] name = "typify-impl" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b52a67305054e1da6f3d99ad94875dcd0c7c49adbd17b4b64f0eefb7ae5bf8ab" +checksum = "741b7f1e2e1338c0bee5ad5a7d3a9bbd4e24c33765c08b7691810e68d879365d" dependencies = [ "heck 0.5.0", "log", "proc-macro2", "quote", "regress", - "schemars", + "schemars 0.8.22", "semver", "serde", "serde_json", @@ -6813,13 +6878,13 @@ dependencies = [ [[package]] name = "typify-macro" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ff5799be156e4f635c348c6051d165e1c59997827155133351a8c4d333d9841" +checksum = "7560adf816a1e8dad7c63d8845ef6e31e673e39eab310d225636779230cbedeb" dependencies = [ "proc-macro2", "quote", - "schemars", + "schemars 0.8.22", "semver", "serde", "serde_json", @@ -6869,9 +6934,9 @@ checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" [[package]] name = "unicode-width" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" +checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c" [[package]] name = "unindent" @@ -6987,9 +7052,9 @@ dependencies = [ [[package]] name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" +version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] name = "wasi" @@ -7152,7 +7217,7 @@ version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6994d13118ab492c3c80c1f81928718159254c53c472bf9ce36f8dae4add02a7" dependencies = [ - "redox_syscall 0.5.12", + "redox_syscall 0.5.17", "wasite", "web-sys", ] @@ -7179,7 +7244,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -7190,9 +7255,9 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "windows" -version = "0.61.1" +version = "0.61.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5ee8f3d025738cb02bad7868bbb5f8a6327501e870bf51f1b455b0a2454a419" +checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893" dependencies = [ "windows-collections", "windows-core", @@ -7220,7 +7285,7 @@ dependencies = [ "windows-interface", "windows-link", "windows-result", - "windows-strings 0.4.2", + "windows-strings", ] [[package]] @@ -7272,17 +7337,6 @@ dependencies = [ "windows-link", ] -[[package]] -name = "windows-registry" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4286ad90ddb45071efd1a66dfa43eb02dd0dfbae1545ad6cc3c51cf34d7e8ba3" -dependencies = [ - "windows-result", - "windows-strings 0.3.1", - "windows-targets 0.53.3", -] - [[package]] name = "windows-result" version = "0.3.4" @@ -7292,15 +7346,6 @@ dependencies = [ "windows-link", ] -[[package]] -name = "windows-strings" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87fa48cc5d406560701792be122a10132491cff9d0aeb23583cc2dcafc847319" -dependencies = [ - "windows-link", -] - [[package]] name = "windows-strings" version = "0.4.2" @@ -7477,9 +7522,9 @@ checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" [[package]] name = "winnow" -version = "0.7.10" +version = "0.7.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c06928c8748d81b05c9be96aad92e1b6ff01833332f281e8cfca3be4b35fc9ec" +checksum = "f3edebf492c8125044983378ecb5766203ad3b4c2f7a922bd7dd207f6d443e95" dependencies = [ "memchr", ] @@ -7510,12 +7555,12 @@ dependencies = [ [[package]] name = "xattr" -version = "1.5.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d65cbf2f12c15564212d48f4e3dfb87923d25d611f2aed18f4cb23f0413d89e" +checksum = "af3a19837351dc82ba89f8a125e22a3c475f05aba604acc023d62b2739ae2909" dependencies = [ "libc", - "rustix 1.0.7", + "rustix 1.0.8", ] [[package]] @@ -7571,18 +7616,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.25" +version = "0.8.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb" +checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.25" +version = "0.8.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef" +checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" dependencies = [ "proc-macro2", "quote", @@ -7682,63 +7727,3 @@ dependencies = [ "cc", "pkg-config", ] - -[[patch.unused]] -name = "arrow" -version = "55.1.0" -source = "git+https://github.com/alamb/arrow-rs.git?rev=99161728b473dc9d0ee1b8a1387ec94110cfc1cc#99161728b473dc9d0ee1b8a1387ec94110cfc1cc" - -[[patch.unused]] -name = "arrow-array" -version = "55.1.0" -source = "git+https://github.com/alamb/arrow-rs.git?rev=99161728b473dc9d0ee1b8a1387ec94110cfc1cc#99161728b473dc9d0ee1b8a1387ec94110cfc1cc" - -[[patch.unused]] -name = "arrow-buffer" -version = "55.1.0" -source = "git+https://github.com/alamb/arrow-rs.git?rev=99161728b473dc9d0ee1b8a1387ec94110cfc1cc#99161728b473dc9d0ee1b8a1387ec94110cfc1cc" - -[[patch.unused]] -name = "arrow-cast" -version = "55.1.0" -source = "git+https://github.com/alamb/arrow-rs.git?rev=99161728b473dc9d0ee1b8a1387ec94110cfc1cc#99161728b473dc9d0ee1b8a1387ec94110cfc1cc" - -[[patch.unused]] -name = "arrow-data" -version = "55.1.0" -source = "git+https://github.com/alamb/arrow-rs.git?rev=99161728b473dc9d0ee1b8a1387ec94110cfc1cc#99161728b473dc9d0ee1b8a1387ec94110cfc1cc" - -[[patch.unused]] -name = "arrow-flight" -version = "55.1.0" -source = "git+https://github.com/alamb/arrow-rs.git?rev=99161728b473dc9d0ee1b8a1387ec94110cfc1cc#99161728b473dc9d0ee1b8a1387ec94110cfc1cc" - -[[patch.unused]] -name = "arrow-ipc" -version = "55.1.0" -source = "git+https://github.com/alamb/arrow-rs.git?rev=99161728b473dc9d0ee1b8a1387ec94110cfc1cc#99161728b473dc9d0ee1b8a1387ec94110cfc1cc" - -[[patch.unused]] -name = "arrow-ord" -version = "55.1.0" -source = "git+https://github.com/alamb/arrow-rs.git?rev=99161728b473dc9d0ee1b8a1387ec94110cfc1cc#99161728b473dc9d0ee1b8a1387ec94110cfc1cc" - -[[patch.unused]] -name = "arrow-schema" -version = "55.1.0" -source = "git+https://github.com/alamb/arrow-rs.git?rev=99161728b473dc9d0ee1b8a1387ec94110cfc1cc#99161728b473dc9d0ee1b8a1387ec94110cfc1cc" - -[[patch.unused]] -name = "arrow-select" -version = "55.1.0" -source = "git+https://github.com/alamb/arrow-rs.git?rev=99161728b473dc9d0ee1b8a1387ec94110cfc1cc#99161728b473dc9d0ee1b8a1387ec94110cfc1cc" - -[[patch.unused]] -name = "arrow-string" -version = "55.1.0" -source = "git+https://github.com/alamb/arrow-rs.git?rev=99161728b473dc9d0ee1b8a1387ec94110cfc1cc#99161728b473dc9d0ee1b8a1387ec94110cfc1cc" - -[[patch.unused]] -name = "parquet" -version = "55.1.0" -source = "git+https://github.com/alamb/arrow-rs.git?rev=99161728b473dc9d0ee1b8a1387ec94110cfc1cc#99161728b473dc9d0ee1b8a1387ec94110cfc1cc" diff --git a/Cargo.toml b/Cargo.toml index e4e8c16f239f..9c0d4a008b51 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -228,48 +228,4 @@ unexpected_cfgs = { level = "warn", check-cfg = [ "cfg(tarpaulin)", "cfg(tarpaulin_include)", ] } -unused_qualifications = "deny" - -# pin to pre-release arrow -#patch.crates-io] -#arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } -#arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } -#arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } -#arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } -#arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } -#arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } -#arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } -#arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } -#arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } -#arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } -#arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } -#parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "52d8d568f4ddd9069b743fc0eab17ffe102da35b" } - -# pin to https://github.com/apache/arrow-rs/pull/7650 -[patch.crates-io] -arrow = { git = "https://github.com/alamb/arrow-rs.git", rev = "99161728b473dc9d0ee1b8a1387ec94110cfc1cc" } -arrow-array = { git = "https://github.com/alamb/arrow-rs.git", rev = "99161728b473dc9d0ee1b8a1387ec94110cfc1cc" } -arrow-buffer = { git = "https://github.com/alamb/arrow-rs.git", rev = "99161728b473dc9d0ee1b8a1387ec94110cfc1cc" } -arrow-cast = { git = "https://github.com/alamb/arrow-rs.git", rev = "99161728b473dc9d0ee1b8a1387ec94110cfc1cc" } -arrow-data = { git = "https://github.com/alamb/arrow-rs.git", rev = "99161728b473dc9d0ee1b8a1387ec94110cfc1cc" } -arrow-ipc = { git = "https://github.com/alamb/arrow-rs.git", rev = "99161728b473dc9d0ee1b8a1387ec94110cfc1cc" } -arrow-schema = { git = "https://github.com/alamb/arrow-rs.git", rev = "99161728b473dc9d0ee1b8a1387ec94110cfc1cc" } -arrow-select = { git = "https://github.com/alamb/arrow-rs.git", rev = "99161728b473dc9d0ee1b8a1387ec94110cfc1cc" } -arrow-string = { git = "https://github.com/alamb/arrow-rs.git", rev = "99161728b473dc9d0ee1b8a1387ec94110cfc1cc" } -arrow-ord = { git = "https://github.com/alamb/arrow-rs.git", rev = "99161728b473dc9d0ee1b8a1387ec94110cfc1cc" } -arrow-flight = { git = "https://github.com/alamb/arrow-rs.git", rev = "99161728b473dc9d0ee1b8a1387ec94110cfc1cc" } -parquet = { git = "https://github.com/alamb/arrow-rs.git", rev = "99161728b473dc9d0ee1b8a1387ec94110cfc1cc" } - - -#arrow = { path= "/Users/andrewlamb/Software/arrow-rs/arrow" } -#arrow-array = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-array" } -#arrow-buffer = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-buffer" } -#arrow-cast = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-cast" } -#arrow-data = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-data" } -#arrow-ipc = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-ipc" } -#arrow-schema = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-schema" } -#arrow-select = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-select" } -#arrow-string = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-string" } -#arrow-ord = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-ord" } -#arrow-flight = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-flight" } -#parquet = { path= "/Users/andrewlamb/Software/arrow-rs/parquet" } +unused_qualifications = "deny" \ No newline at end of file diff --git a/datafusion/sqllogictest/test_files/explain_tree.slt b/datafusion/sqllogictest/test_files/explain_tree.slt index 8096c8cacf4c..b1aeb00814b9 100644 --- a/datafusion/sqllogictest/test_files/explain_tree.slt +++ b/datafusion/sqllogictest/test_files/explain_tree.slt @@ -280,7 +280,7 @@ physical_plan 06)┌─────────────┴─────────────┐ 07)│ DataSourceExec │ 08)│ -------------------- │ -09)│ bytes: 1072 │ +09)│ bytes: 1040 │ 10)│ format: memory │ 11)│ rows: 2 │ 12)└───────────────────────────┘ @@ -291,47 +291,40 @@ explain SELECT table1.string_col, table2.date_col FROM table1 JOIN table2 ON tab ---- physical_plan 01)┌───────────────────────────┐ -02)│ CoalesceBatchesExec │ +02)│ ProjectionExec │ 03)│ -------------------- │ -04)│ target_batch_size: │ -05)│ 8192 │ -06)└─────────────┬─────────────┘ -07)┌─────────────┴─────────────┐ -08)│ HashJoinExec │ -09)│ -------------------- │ -10)│ on: ├──────────────┐ -11)│ (int_col = int_col) │ │ -12)└─────────────┬─────────────┘ │ -13)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ -14)│ CoalesceBatchesExec ││ CoalesceBatchesExec │ -15)│ -------------------- ││ -------------------- │ -16)│ target_batch_size: ││ target_batch_size: │ -17)│ 8192 ││ 8192 │ -18)└─────────────┬─────────────┘└─────────────┬─────────────┘ -19)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ -20)│ RepartitionExec ││ RepartitionExec │ -21)│ -------------------- ││ -------------------- │ -22)│ partition_count(in->out): ││ partition_count(in->out): │ -23)│ 4 -> 4 ││ 4 -> 4 │ -24)│ ││ │ -25)│ partitioning_scheme: ││ partitioning_scheme: │ -26)│ Hash([int_col@0], 4) ││ Hash([int_col@0], 4) │ -27)└─────────────┬─────────────┘└─────────────┬─────────────┘ -28)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ -29)│ RepartitionExec ││ RepartitionExec │ -30)│ -------------------- ││ -------------------- │ -31)│ partition_count(in->out): ││ partition_count(in->out): │ -32)│ 1 -> 4 ││ 1 -> 4 │ -33)│ ││ │ -34)│ partitioning_scheme: ││ partitioning_scheme: │ -35)│ RoundRobinBatch(4) ││ RoundRobinBatch(4) │ -36)└─────────────┬─────────────┘└─────────────┬─────────────┘ -37)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ -38)│ DataSourceExec ││ DataSourceExec │ -39)│ -------------------- ││ -------------------- │ -40)│ files: 1 ││ files: 1 │ -41)│ format: csv ││ format: parquet │ -42)└───────────────────────────┘└───────────────────────────┘ +04)│ date_col: date_col │ +05)│ │ +06)│ string_col: │ +07)│ string_col │ +08)└─────────────┬─────────────┘ +09)┌─────────────┴─────────────┐ +10)│ CoalesceBatchesExec │ +11)│ -------------------- │ +12)│ target_batch_size: │ +13)│ 8192 │ +14)└─────────────┬─────────────┘ +15)┌─────────────┴─────────────┐ +16)│ HashJoinExec │ +17)│ -------------------- │ +18)│ on: ├──────────────┐ +19)│ (int_col = int_col) │ │ +20)└─────────────┬─────────────┘ │ +21)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ +22)│ DataSourceExec ││ RepartitionExec │ +23)│ -------------------- ││ -------------------- │ +24)│ files: 1 ││ partition_count(in->out): │ +25)│ format: parquet ││ 1 -> 4 │ +26)│ ││ │ +27)│ ││ partitioning_scheme: │ +28)│ ││ RoundRobinBatch(4) │ +29)└───────────────────────────┘└─────────────┬─────────────┘ +30)-----------------------------┌─────────────┴─────────────┐ +31)-----------------------------│ DataSourceExec │ +32)-----------------------------│ -------------------- │ +33)-----------------------------│ files: 1 │ +34)-----------------------------│ format: csv │ +35)-----------------------------└───────────────────────────┘ # 3 Joins query TT @@ -365,48 +358,41 @@ physical_plan 19)│ (int_col = int_col) │ │ 20)└─────────────┬─────────────┘ │ 21)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ -22)│ DataSourceExec ││ CoalesceBatchesExec │ +22)│ DataSourceExec ││ ProjectionExec │ 23)│ -------------------- ││ -------------------- │ -24)│ bytes: 536 ││ target_batch_size: │ -25)│ format: memory ││ 8192 │ +24)│ bytes: 520 ││ date_col: date_col │ +25)│ format: memory ││ int_col: int_col │ 26)│ rows: 1 ││ │ -27)└───────────────────────────┘└─────────────┬─────────────┘ -28)-----------------------------┌─────────────┴─────────────┐ -29)-----------------------------│ HashJoinExec │ -30)-----------------------------│ -------------------- │ -31)-----------------------------│ on: ├──────────────┐ -32)-----------------------------│ (int_col = int_col) │ │ -33)-----------------------------└─────────────┬─────────────┘ │ -34)-----------------------------┌─────────────┴─────────────┐┌─────────────┴─────────────┐ -35)-----------------------------│ CoalesceBatchesExec ││ CoalesceBatchesExec │ -36)-----------------------------│ -------------------- ││ -------------------- │ -37)-----------------------------│ target_batch_size: ││ target_batch_size: │ -38)-----------------------------│ 8192 ││ 8192 │ -39)-----------------------------└─────────────┬─────────────┘└─────────────┬─────────────┘ -40)-----------------------------┌─────────────┴─────────────┐┌─────────────┴─────────────┐ -41)-----------------------------│ RepartitionExec ││ RepartitionExec │ -42)-----------------------------│ -------------------- ││ -------------------- │ -43)-----------------------------│ partition_count(in->out): ││ partition_count(in->out): │ -44)-----------------------------│ 4 -> 4 ││ 4 -> 4 │ -45)-----------------------------│ ││ │ -46)-----------------------------│ partitioning_scheme: ││ partitioning_scheme: │ -47)-----------------------------│ Hash([int_col@0], 4) ││ Hash([int_col@0], 4) │ -48)-----------------------------└─────────────┬─────────────┘└─────────────┬─────────────┘ -49)-----------------------------┌─────────────┴─────────────┐┌─────────────┴─────────────┐ -50)-----------------------------│ RepartitionExec ││ RepartitionExec │ -51)-----------------------------│ -------------------- ││ -------------------- │ -52)-----------------------------│ partition_count(in->out): ││ partition_count(in->out): │ -53)-----------------------------│ 1 -> 4 ││ 1 -> 4 │ -54)-----------------------------│ ││ │ -55)-----------------------------│ partitioning_scheme: ││ partitioning_scheme: │ -56)-----------------------------│ RoundRobinBatch(4) ││ RoundRobinBatch(4) │ -57)-----------------------------└─────────────┬─────────────┘└─────────────┬─────────────┘ -58)-----------------------------┌─────────────┴─────────────┐┌─────────────┴─────────────┐ -59)-----------------------------│ DataSourceExec ││ DataSourceExec │ -60)-----------------------------│ -------------------- ││ -------------------- │ -61)-----------------------------│ files: 1 ││ files: 1 │ -62)-----------------------------│ format: csv ││ format: parquet │ -63)-----------------------------└───────────────────────────┘└───────────────────────────┘ +27)│ ││ string_col: │ +28)│ ││ string_col │ +29)└───────────────────────────┘└─────────────┬─────────────┘ +30)-----------------------------┌─────────────┴─────────────┐ +31)-----------------------------│ CoalesceBatchesExec │ +32)-----------------------------│ -------------------- │ +33)-----------------------------│ target_batch_size: │ +34)-----------------------------│ 8192 │ +35)-----------------------------└─────────────┬─────────────┘ +36)-----------------------------┌─────────────┴─────────────┐ +37)-----------------------------│ HashJoinExec │ +38)-----------------------------│ -------------------- │ +39)-----------------------------│ on: ├──────────────┐ +40)-----------------------------│ (int_col = int_col) │ │ +41)-----------------------------└─────────────┬─────────────┘ │ +42)-----------------------------┌─────────────┴─────────────┐┌─────────────┴─────────────┐ +43)-----------------------------│ DataSourceExec ││ RepartitionExec │ +44)-----------------------------│ -------------------- ││ -------------------- │ +45)-----------------------------│ files: 1 ││ partition_count(in->out): │ +46)-----------------------------│ format: parquet ││ 1 -> 4 │ +47)-----------------------------│ ││ │ +48)-----------------------------│ ││ partitioning_scheme: │ +49)-----------------------------│ ││ RoundRobinBatch(4) │ +50)-----------------------------└───────────────────────────┘└─────────────┬─────────────┘ +51)----------------------------------------------------------┌─────────────┴─────────────┐ +52)----------------------------------------------------------│ DataSourceExec │ +53)----------------------------------------------------------│ -------------------- │ +54)----------------------------------------------------------│ files: 1 │ +55)----------------------------------------------------------│ format: csv │ +56)----------------------------------------------------------└───────────────────────────┘ # Long Filter (demonstrate what happens with wrapping) query TT @@ -669,7 +655,7 @@ physical_plan 13)┌─────────────┴─────────────┐ 14)│ DataSourceExec │ 15)│ -------------------- │ -16)│ bytes: 536 │ +16)│ bytes: 520 │ 17)│ format: memory │ 18)│ rows: 1 │ 19)└───────────────────────────┘ @@ -1029,20 +1015,11 @@ physical_plan 11)│ bigint_col │ 12)└─────────────┬─────────────┘ 13)┌─────────────┴─────────────┐ -14)│ RepartitionExec │ +14)│ DataSourceExec │ 15)│ -------------------- │ -16)│ partition_count(in->out): │ -17)│ 1 -> 4 │ -18)│ │ -19)│ partitioning_scheme: │ -20)│ RoundRobinBatch(4) │ -21)└─────────────┬─────────────┘ -22)┌─────────────┴─────────────┐ -23)│ DataSourceExec │ -24)│ -------------------- │ -25)│ files: 1 │ -26)│ format: parquet │ -27)└───────────────────────────┘ +16)│ files: 1 │ +17)│ format: parquet │ +18)└───────────────────────────┘ # Query with projection on memory @@ -1065,7 +1042,7 @@ physical_plan 13)┌─────────────┴─────────────┐ 14)│ DataSourceExec │ 15)│ -------------------- │ -16)│ bytes: 536 │ +16)│ bytes: 520 │ 17)│ format: memory │ 18)│ rows: 1 │ 19)└───────────────────────────┘ @@ -1186,51 +1163,46 @@ explain select * from table1 inner join table2 on table1.int_col = table2.int_co ---- physical_plan 01)┌───────────────────────────┐ -02)│ CoalesceBatchesExec │ +02)│ ProjectionExec │ 03)│ -------------------- │ -04)│ target_batch_size: │ -05)│ 8192 │ -06)└─────────────┬─────────────┘ -07)┌─────────────┴─────────────┐ -08)│ HashJoinExec │ -09)│ -------------------- │ -10)│ on: │ -11)│ (int_col = int_col), ├──────────────┐ -12)│ (string_col = │ │ -13)│ string_col) │ │ -14)└─────────────┬─────────────┘ │ -15)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ -16)│ CoalesceBatchesExec ││ CoalesceBatchesExec │ -17)│ -------------------- ││ -------------------- │ -18)│ target_batch_size: ││ target_batch_size: │ -19)│ 8192 ││ 8192 │ -20)└─────────────┬─────────────┘└─────────────┬─────────────┘ -21)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ -22)│ RepartitionExec ││ RepartitionExec │ -23)│ -------------------- ││ -------------------- │ -24)│ partition_count(in->out): ││ partition_count(in->out): │ -25)│ 4 -> 4 ││ 4 -> 4 │ -26)│ ││ │ -27)│ partitioning_scheme: ││ partitioning_scheme: │ -28)│ Hash([int_col@0, ││ Hash([int_col@0, │ -29)│ string_col@1], ││ string_col@1], │ -30)│ 4) ││ 4) │ -31)└─────────────┬─────────────┘└─────────────┬─────────────┘ -32)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ -33)│ RepartitionExec ││ RepartitionExec │ -34)│ -------------------- ││ -------------------- │ -35)│ partition_count(in->out): ││ partition_count(in->out): │ -36)│ 1 -> 4 ││ 1 -> 4 │ -37)│ ││ │ -38)│ partitioning_scheme: ││ partitioning_scheme: │ -39)│ RoundRobinBatch(4) ││ RoundRobinBatch(4) │ -40)└─────────────┬─────────────┘└─────────────┬─────────────┘ -41)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ -42)│ DataSourceExec ││ DataSourceExec │ -43)│ -------------------- ││ -------------------- │ -44)│ files: 1 ││ files: 1 │ -45)│ format: csv ││ format: parquet │ -46)└───────────────────────────┘└───────────────────────────┘ +04)│ bigint_col: │ +05)│ bigint_col │ +06)│ │ +07)│ date_col: date_col │ +08)│ int_col: int_col │ +09)│ │ +10)│ string_col: │ +11)│ string_col │ +12)└─────────────┬─────────────┘ +13)┌─────────────┴─────────────┐ +14)│ CoalesceBatchesExec │ +15)│ -------------------- │ +16)│ target_batch_size: │ +17)│ 8192 │ +18)└─────────────┬─────────────┘ +19)┌─────────────┴─────────────┐ +20)│ HashJoinExec │ +21)│ -------------------- │ +22)│ on: │ +23)│ (int_col = int_col), ├──────────────┐ +24)│ (string_col = │ │ +25)│ string_col) │ │ +26)└─────────────┬─────────────┘ │ +27)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ +28)│ DataSourceExec ││ RepartitionExec │ +29)│ -------------------- ││ -------------------- │ +30)│ files: 1 ││ partition_count(in->out): │ +31)│ format: parquet ││ 1 -> 4 │ +32)│ ││ │ +33)│ ││ partitioning_scheme: │ +34)│ ││ RoundRobinBatch(4) │ +35)└───────────────────────────┘└─────────────┬─────────────┘ +36)-----------------------------┌─────────────┴─────────────┐ +37)-----------------------------│ DataSourceExec │ +38)-----------------------------│ -------------------- │ +39)-----------------------------│ files: 1 │ +40)-----------------------------│ format: csv │ +41)-----------------------------└───────────────────────────┘ # Query with outer hash join. query TT @@ -1238,53 +1210,48 @@ explain select * from table1 left outer join table2 on table1.int_col = table2.i ---- physical_plan 01)┌───────────────────────────┐ -02)│ CoalesceBatchesExec │ +02)│ ProjectionExec │ 03)│ -------------------- │ -04)│ target_batch_size: │ -05)│ 8192 │ -06)└─────────────┬─────────────┘ -07)┌─────────────┴─────────────┐ -08)│ HashJoinExec │ -09)│ -------------------- │ -10)│ join_type: Left │ -11)│ │ -12)│ on: ├──────────────┐ -13)│ (int_col = int_col), │ │ -14)│ (string_col = │ │ -15)│ string_col) │ │ -16)└─────────────┬─────────────┘ │ -17)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ -18)│ CoalesceBatchesExec ││ CoalesceBatchesExec │ -19)│ -------------------- ││ -------------------- │ -20)│ target_batch_size: ││ target_batch_size: │ -21)│ 8192 ││ 8192 │ -22)└─────────────┬─────────────┘└─────────────┬─────────────┘ -23)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ -24)│ RepartitionExec ││ RepartitionExec │ -25)│ -------------------- ││ -------------------- │ -26)│ partition_count(in->out): ││ partition_count(in->out): │ -27)│ 4 -> 4 ││ 4 -> 4 │ -28)│ ││ │ -29)│ partitioning_scheme: ││ partitioning_scheme: │ -30)│ Hash([int_col@0, ││ Hash([int_col@0, │ -31)│ string_col@1], ││ string_col@1], │ -32)│ 4) ││ 4) │ -33)└─────────────┬─────────────┘└─────────────┬─────────────┘ -34)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ -35)│ RepartitionExec ││ RepartitionExec │ -36)│ -------------------- ││ -------------------- │ -37)│ partition_count(in->out): ││ partition_count(in->out): │ -38)│ 1 -> 4 ││ 1 -> 4 │ -39)│ ││ │ -40)│ partitioning_scheme: ││ partitioning_scheme: │ -41)│ RoundRobinBatch(4) ││ RoundRobinBatch(4) │ -42)└─────────────┬─────────────┘└─────────────┬─────────────┘ -43)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ -44)│ DataSourceExec ││ DataSourceExec │ -45)│ -------------------- ││ -------------------- │ -46)│ files: 1 ││ files: 1 │ -47)│ format: csv ││ format: parquet │ -48)└───────────────────────────┘└───────────────────────────┘ +04)│ bigint_col: │ +05)│ bigint_col │ +06)│ │ +07)│ date_col: date_col │ +08)│ int_col: int_col │ +09)│ │ +10)│ string_col: │ +11)│ string_col │ +12)└─────────────┬─────────────┘ +13)┌─────────────┴─────────────┐ +14)│ CoalesceBatchesExec │ +15)│ -------------------- │ +16)│ target_batch_size: │ +17)│ 8192 │ +18)└─────────────┬─────────────┘ +19)┌─────────────┴─────────────┐ +20)│ HashJoinExec │ +21)│ -------------------- │ +22)│ join_type: Right │ +23)│ │ +24)│ on: ├──────────────┐ +25)│ (int_col = int_col), │ │ +26)│ (string_col = │ │ +27)│ string_col) │ │ +28)└─────────────┬─────────────┘ │ +29)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ +30)│ DataSourceExec ││ RepartitionExec │ +31)│ -------------------- ││ -------------------- │ +32)│ files: 1 ││ partition_count(in->out): │ +33)│ format: parquet ││ 1 -> 4 │ +34)│ ││ │ +35)│ ││ partitioning_scheme: │ +36)│ ││ RoundRobinBatch(4) │ +37)└───────────────────────────┘└─────────────┬─────────────┘ +38)-----------------------------┌─────────────┴─────────────┐ +39)-----------------------------│ DataSourceExec │ +40)-----------------------------│ -------------------- │ +41)-----------------------------│ files: 1 │ +42)-----------------------------│ format: csv │ +43)-----------------------------└───────────────────────────┘ # Query with nested loop join. query TT @@ -1303,35 +1270,8 @@ physical_plan 10)│ format: csv ││ │ 11)└───────────────────────────┘└─────────────┬─────────────┘ 12)-----------------------------┌─────────────┴─────────────┐ -13)-----------------------------│ AggregateExec │ -14)-----------------------------│ -------------------- │ -15)-----------------------------│ aggr: count(1) │ -16)-----------------------------│ mode: Final │ -17)-----------------------------└─────────────┬─────────────┘ -18)-----------------------------┌─────────────┴─────────────┐ -19)-----------------------------│ CoalescePartitionsExec │ -20)-----------------------------└─────────────┬─────────────┘ -21)-----------------------------┌─────────────┴─────────────┐ -22)-----------------------------│ AggregateExec │ -23)-----------------------------│ -------------------- │ -24)-----------------------------│ aggr: count(1) │ -25)-----------------------------│ mode: Partial │ -26)-----------------------------└─────────────┬─────────────┘ -27)-----------------------------┌─────────────┴─────────────┐ -28)-----------------------------│ RepartitionExec │ -29)-----------------------------│ -------------------- │ -30)-----------------------------│ partition_count(in->out): │ -31)-----------------------------│ 1 -> 4 │ -32)-----------------------------│ │ -33)-----------------------------│ partitioning_scheme: │ -34)-----------------------------│ RoundRobinBatch(4) │ -35)-----------------------------└─────────────┬─────────────┘ -36)-----------------------------┌─────────────┴─────────────┐ -37)-----------------------------│ DataSourceExec │ -38)-----------------------------│ -------------------- │ -39)-----------------------------│ files: 1 │ -40)-----------------------------│ format: parquet │ -41)-----------------------------└───────────────────────────┘ +13)-----------------------------│ PlaceholderRowExec │ +14)-----------------------------└───────────────────────────┘ # Query with cross join. query TT @@ -1342,20 +1282,11 @@ physical_plan 02)│ CrossJoinExec ├──────────────┐ 03)└─────────────┬─────────────┘ │ 04)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ -05)│ DataSourceExec ││ RepartitionExec │ +05)│ DataSourceExec ││ DataSourceExec │ 06)│ -------------------- ││ -------------------- │ -07)│ files: 1 ││ partition_count(in->out): │ -08)│ format: csv ││ 1 -> 4 │ -09)│ ││ │ -10)│ ││ partitioning_scheme: │ -11)│ ││ RoundRobinBatch(4) │ -12)└───────────────────────────┘└─────────────┬─────────────┘ -13)-----------------------------┌─────────────┴─────────────┐ -14)-----------------------------│ DataSourceExec │ -15)-----------------------------│ -------------------- │ -16)-----------------------------│ files: 1 │ -17)-----------------------------│ format: parquet │ -18)-----------------------------└───────────────────────────┘ +07)│ files: 1 ││ files: 1 │ +08)│ format: csv ││ format: parquet │ +09)└───────────────────────────┘└───────────────────────────┘ # Query with sort merge join. diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt index 3f5d9d92c2be..5d68ed35b2a9 100644 --- a/datafusion/sqllogictest/test_files/joins.slt +++ b/datafusion/sqllogictest/test_files/joins.slt @@ -2109,9 +2109,9 @@ RIGHT JOIN (select t2_id from join_t2 where join_t2.t2_id > 11) as join_t2 ORDER BY 1, 2 ---- 33 44 -NULL 22 33 55 44 55 +NULL 22 ##### # Configuration teardown diff --git a/datafusion/sqllogictest/test_files/limit.slt b/datafusion/sqllogictest/test_files/limit.slt index 923c35e6261f..77850c6ae7c5 100644 --- a/datafusion/sqllogictest/test_files/limit.slt +++ b/datafusion/sqllogictest/test_files/limit.slt @@ -853,11 +853,7 @@ physical_plan 01)ProjectionExec: expr=[1 as foo] 02)--SortPreservingMergeExec: [part_key@0 ASC NULLS LAST], fetch=1 03)----SortExec: TopK(fetch=1), expr=[part_key@0 ASC NULLS LAST], preserve_partitioning=[true] -<<<<<<< HEAD -04)------DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_limit_with_partitions/part-0.parquet:0..794], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_limit_with_partitions/part-1.parquet:0..794], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_limit_with_partitions/part-2.parquet:0..794]]}, projection=[part_key], file_type=parquet -======= 04)------DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_limit_with_partitions/part-0.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_limit_with_partitions/part-1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_limit_with_partitions/part-2.parquet]]}, projection=[part_key], file_type=parquet, predicate=DynamicFilterPhysicalExpr [ true ] ->>>>>>> upstream/main query I with selection as ( diff --git a/datafusion/sqllogictest/test_files/repartition_scan.slt b/datafusion/sqllogictest/test_files/repartition_scan.slt index f6240d195de8..c536c8165c5a 100644 --- a/datafusion/sqllogictest/test_files/repartition_scan.slt +++ b/datafusion/sqllogictest/test_files/repartition_scan.slt @@ -138,11 +138,7 @@ physical_plan 01)SortPreservingMergeExec: [column1@0 ASC NULLS LAST] 02)--CoalesceBatchesExec: target_batch_size=8192 03)----FilterExec: column1@0 != 42 -<<<<<<< HEAD -04)------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:0..277], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:281..563], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:277..554]]}, projection=[column1], output_ordering=[column1@0 ASC NULLS LAST], file_type=parquet, predicate=column1@0 != 42, pruning_predicate=column1_null_count@2 != row_count@3 AND (column1_min@0 != 42 OR 42 != column1_max@1), required_guarantees=[column1 not in (42)] -======= 04)------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:0..263], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..268], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:268..537], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:263..526]]}, projection=[column1], output_ordering=[column1@0 ASC NULLS LAST], file_type=parquet, predicate=column1@0 != 42, pruning_predicate=column1_null_count@2 != row_count@3 AND (column1_min@0 != 42 OR 42 != column1_max@1), required_guarantees=[column1 not in (42)] ->>>>>>> upstream/main # Cleanup statement ok From 0832ff4c9fdd0decb224b7a8bf3c11c6308a6418 Mon Sep 17 00:00:00 2001 From: zhuqi-lucas <821684824@qq.com> Date: Sat, 9 Aug 2025 10:38:16 +0800 Subject: [PATCH 17/22] fix --- Cargo.toml | 2 +- .../sqllogictest/test_files/explain_tree.slt | 247 +++++++++++++++++- 2 files changed, 239 insertions(+), 10 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 9c0d4a008b51..133536170878 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -228,4 +228,4 @@ unexpected_cfgs = { level = "warn", check-cfg = [ "cfg(tarpaulin)", "cfg(tarpaulin_include)", ] } -unused_qualifications = "deny" \ No newline at end of file +unused_qualifications = "deny" diff --git a/datafusion/sqllogictest/test_files/explain_tree.slt b/datafusion/sqllogictest/test_files/explain_tree.slt index b1aeb00814b9..8501a5077f8a 100644 --- a/datafusion/sqllogictest/test_files/explain_tree.slt +++ b/datafusion/sqllogictest/test_files/explain_tree.slt @@ -1021,7 +1021,6 @@ physical_plan 17)│ format: parquet │ 18)└───────────────────────────┘ - # Query with projection on memory query TT explain SELECT int_col, bigint_col, int_col+bigint_col AS sum_col FROM table3; @@ -1288,7 +1287,6 @@ physical_plan 08)│ format: csv ││ format: parquet │ 09)└───────────────────────────┘└───────────────────────────┘ - # Query with sort merge join. statement ok set datafusion.optimizer.prefer_hash_join = false; @@ -1466,8 +1464,8 @@ drop table t2; # prepare table statement ok CREATE UNBOUNDED EXTERNAL TABLE data ( - "date" DATE, - "ticker" VARCHAR, + "date" DATE, + "ticker" VARCHAR, "time" TIMESTAMP, ) STORED AS CSV WITH ORDER ("date", "ticker", "time") @@ -1476,8 +1474,8 @@ LOCATION './a.parquet'; # query query TT -explain SELECT * FROM data -WHERE ticker = 'A' +explain SELECT * FROM data +WHERE ticker = 'A' ORDER BY "date", "time"; ---- physical_plan @@ -1639,7 +1637,7 @@ physical_plan # same thing but order by time, date query TT -explain SELECT * FROM data +explain SELECT * FROM data WHERE ticker = 'A' AND CAST(time AS DATE) = date ORDER BY "time", "date"; ---- @@ -1684,8 +1682,8 @@ physical_plan # query query TT -explain SELECT * FROM data -WHERE date = '2006-01-02' +explain SELECT * FROM data +WHERE date = '2006-01-02' ORDER BY "ticker", "time"; ---- physical_plan @@ -1983,3 +1981,234 @@ physical_plan 06)┌─────────────┴─────────────┐ 07)│ PlaceholderRowExec │ 08)└───────────────────────────┘ + + +# Test explain for large plans + +statement ok +CREATE TABLE t (k int) + +# By default, the plan of this large query is cropped +query TT +EXPLAIN SELECT * FROM t t1, t t2, t t3, t t4, t t5, t t6, t t7, t t8, t t9, t t10 +---- +physical_plan +01)┌───────────────────────────┐ +02)│ CrossJoinExec ├──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +03)└─────────────┬─────────────┘ +04)┌─────────────┴─────────────┐ +05)│ CrossJoinExec │ +06)│ │ +07)│ ├─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +08)│ │ │ +09)│ │ │ +10)└─────────────┬─────────────┘ │ +11)┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐ +12)│ CrossJoinExec │ │ DataSourceExec │ +13)│ │ │ -------------------- │ +14)│ ├────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ bytes: 0 │ +15)│ │ │ │ format: memory │ +16)│ │ │ │ rows: 0 │ +17)└─────────────┬─────────────┘ │ └───────────────────────────┘ +18)┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐ +19)│ CrossJoinExec │ │ DataSourceExec │ +20)│ │ │ -------------------- │ +21)│ ├───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ bytes: 0 │ +22)│ │ │ │ format: memory │ +23)│ │ │ │ rows: 0 │ +24)└─────────────┬─────────────┘ │ └───────────────────────────┘ +25)┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐ +26)│ CrossJoinExec │ │ DataSourceExec │ +27)│ │ │ -------------------- │ +28)│ ├──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ bytes: 0 │ +29)│ │ │ │ format: memory │ +30)│ │ │ │ rows: 0 │ +31)└─────────────┬─────────────┘ │ └───────────────────────────┘ +32)┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐ +33)│ CrossJoinExec │ │ DataSourceExec │ +34)│ │ │ -------------------- │ +35)│ ├─────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ bytes: 0 │ +36)│ │ │ │ format: memory │ +37)│ │ │ │ rows: 0 │ +38)└─────────────┬─────────────┘ │ └───────────────────────────┘ +39)┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐ +40)│ CrossJoinExec │ │ DataSourceExec │ +41)│ │ │ -------------------- │ +42)│ ├────────────────────────────────────────────────────────────────────────┐ │ bytes: 0 │ +43)│ │ │ │ format: memory │ +44)│ │ │ │ rows: 0 │ +45)└─────────────┬─────────────┘ │ └───────────────────────────┘ +46)┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐ +47)│ CrossJoinExec │ │ DataSourceExec │ +48)│ │ │ -------------------- │ +49)│ ├───────────────────────────────────────────┐ │ bytes: 0 │ +50)│ │ │ │ format: memory │ +51)│ │ │ │ rows: 0 │ +52)└─────────────┬─────────────┘ │ └───────────────────────────┘ +53)┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐ +54)│ CrossJoinExec │ │ DataSourceExec │ +55)│ │ │ -------------------- │ +56)│ ├──────────────┐ │ bytes: 0 │ +57)│ │ │ │ format: memory │ +58)│ │ │ │ rows: 0 │ +59)└─────────────┬─────────────┘ │ └───────────────────────────┘ +60)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ +61)│ DataSourceExec ││ DataSourceExec │ +62)│ -------------------- ││ -------------------- │ +63)│ bytes: 0 ││ bytes: 0 │ +64)│ format: memory ││ format: memory │ +65)│ rows: 0 ││ rows: 0 │ +66)└───────────────────────────┘└───────────────────────────┘ + +# Setting the tree_maximum_render_size to 0 will allow the entire plan to be rendered +statement ok +SET datafusion.explain.tree_maximum_render_width = 0 + +query TT +EXPLAIN SELECT * FROM t t1, t t2, t t3, t t4, t t5, t t6, t t7, t t8, t t9, t t10 +---- +physical_plan +01)┌───────────────────────────┐ +02)│ CrossJoinExec ├──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +03)└─────────────┬─────────────┘ │ +04)┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐ +05)│ CrossJoinExec │ │ DataSourceExec │ +06)│ │ │ -------------------- │ +07)│ ├─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ bytes: 0 │ +08)│ │ │ │ format: memory │ +09)│ │ │ │ rows: 0 │ +10)└─────────────┬─────────────┘ │ └───────────────────────────┘ +11)┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐ +12)│ CrossJoinExec │ │ DataSourceExec │ +13)│ │ │ -------------------- │ +14)│ ├────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ bytes: 0 │ +15)│ │ │ │ format: memory │ +16)│ │ │ │ rows: 0 │ +17)└─────────────┬─────────────┘ │ └───────────────────────────┘ +18)┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐ +19)│ CrossJoinExec │ │ DataSourceExec │ +20)│ │ │ -------------------- │ +21)│ ├───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ bytes: 0 │ +22)│ │ │ │ format: memory │ +23)│ │ │ │ rows: 0 │ +24)└─────────────┬─────────────┘ │ └───────────────────────────┘ +25)┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐ +26)│ CrossJoinExec │ │ DataSourceExec │ +27)│ │ │ -------------------- │ +28)│ ├──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ bytes: 0 │ +29)│ │ │ │ format: memory │ +30)│ │ │ │ rows: 0 │ +31)└─────────────┬─────────────┘ │ └───────────────────────────┘ +32)┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐ +33)│ CrossJoinExec │ │ DataSourceExec │ +34)│ │ │ -------------------- │ +35)│ ├─────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ bytes: 0 │ +36)│ │ │ │ format: memory │ +37)│ │ │ │ rows: 0 │ +38)└─────────────┬─────────────┘ │ └───────────────────────────┘ +39)┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐ +40)│ CrossJoinExec │ │ DataSourceExec │ +41)│ │ │ -------------------- │ +42)│ ├────────────────────────────────────────────────────────────────────────┐ │ bytes: 0 │ +43)│ │ │ │ format: memory │ +44)│ │ │ │ rows: 0 │ +45)└─────────────┬─────────────┘ │ └───────────────────────────┘ +46)┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐ +47)│ CrossJoinExec │ │ DataSourceExec │ +48)│ │ │ -------------------- │ +49)│ ├───────────────────────────────────────────┐ │ bytes: 0 │ +50)│ │ │ │ format: memory │ +51)│ │ │ │ rows: 0 │ +52)└─────────────┬─────────────┘ │ └───────────────────────────┘ +53)┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐ +54)│ CrossJoinExec │ │ DataSourceExec │ +55)│ │ │ -------------------- │ +56)│ ├──────────────┐ │ bytes: 0 │ +57)│ │ │ │ format: memory │ +58)│ │ │ │ rows: 0 │ +59)└─────────────┬─────────────┘ │ └───────────────────────────┘ +60)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ +61)│ DataSourceExec ││ DataSourceExec │ +62)│ -------------------- ││ -------------------- │ +63)│ bytes: 0 ││ bytes: 0 │ +64)│ format: memory ││ format: memory │ +65)│ rows: 0 ││ rows: 0 │ +66)└───────────────────────────┘└───────────────────────────┘ + +# Setting the tree_maximum_render_size to a smaller size +statement ok +SET datafusion.explain.tree_maximum_render_width = 60 + +query TT +EXPLAIN SELECT * FROM t t1, t t2, t t3, t t4, t t5, t t6, t t7, t t8, t t9, t t10 +---- +physical_plan +01)┌───────────────────────────┐ +02)│ CrossJoinExec ├────────────────────────────────────────────────────────── +03)└─────────────┬─────────────┘ +04)┌─────────────┴─────────────┐ +05)│ CrossJoinExec │ +06)│ │ +07)│ ├────────────────────────────────────────────────────────── +08)│ │ +09)│ │ +10)└─────────────┬─────────────┘ +11)┌─────────────┴─────────────┐ +12)│ CrossJoinExec │ +13)│ │ +14)│ ├────────────────────────────────────────────────────────── +15)│ │ +16)│ │ +17)└─────────────┬─────────────┘ +18)┌─────────────┴─────────────┐ +19)│ CrossJoinExec │ +20)│ │ +21)│ ├────────────────────────────────────────────────────────── +22)│ │ +23)│ │ +24)└─────────────┬─────────────┘ +25)┌─────────────┴─────────────┐ +26)│ CrossJoinExec │ +27)│ │ +28)│ ├────────────────────────────────────────────────────────── +29)│ │ +30)│ │ +31)└─────────────┬─────────────┘ +32)┌─────────────┴─────────────┐ +33)│ CrossJoinExec │ +34)│ │ +35)│ ├────────────────────────────────────────────────────────── +36)│ │ +37)│ │ +38)└─────────────┬─────────────┘ +39)┌─────────────┴─────────────┐ +40)│ CrossJoinExec │ +41)│ │ +42)│ ├────────────────────────────────────────────────────────── +43)│ │ +44)│ │ +45)└─────────────┬─────────────┘ +46)┌─────────────┴─────────────┐ +47)│ CrossJoinExec │ +48)│ │ +49)│ ├───────────────────────────────────────────┐ +50)│ │ │ +51)│ │ │ +52)└─────────────┬─────────────┘ │ +53)┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐ +54)│ CrossJoinExec │ │ DataSourceExec │ +55)│ │ │ -------------------- │ +56)│ ├──────────────┐ │ bytes: 0 │ +57)│ │ │ │ format: memory │ +58)│ │ │ │ rows: 0 │ +59)└─────────────┬─────────────┘ │ └───────────────────────────┘ +60)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ +61)│ DataSourceExec ││ DataSourceExec │ +62)│ -------------------- ││ -------------------- │ +63)│ bytes: 0 ││ bytes: 0 │ +64)│ format: memory ││ format: memory │ +65)│ rows: 0 ││ rows: 0 │ +66)└───────────────────────────┘└───────────────────────────┘ + +statement ok +DROP TABLE t \ No newline at end of file From 7e6ced0b67a00c240868bc156dcfc4c9973cfd82 Mon Sep 17 00:00:00 2001 From: zhuqi-lucas <821684824@qq.com> Date: Sat, 9 Aug 2025 10:55:05 +0800 Subject: [PATCH 18/22] fix test --- datafusion/core/src/datasource/listing/table.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs index f4cfa10fd536..121ab46730b5 100644 --- a/datafusion/core/src/datasource/listing/table.rs +++ b/datafusion/core/src/datasource/listing/table.rs @@ -2095,7 +2095,7 @@ mod tests { ParquetFormat::default().get_ext(), FileCompressionType::UNCOMPRESSED, Some(config_map), - 4, + 2, ) .await?; Ok(()) From 3ccea48d7faa0cf6ea419215db0fc903d6a3fa76 Mon Sep 17 00:00:00 2001 From: zhuqi-lucas <821684824@qq.com> Date: Sat, 9 Aug 2025 11:32:31 +0800 Subject: [PATCH 19/22] fix --- datafusion/sqllogictest/test_files/explain_tree.slt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sqllogictest/test_files/explain_tree.slt b/datafusion/sqllogictest/test_files/explain_tree.slt index 8501a5077f8a..c9f6d69dbe1d 100644 --- a/datafusion/sqllogictest/test_files/explain_tree.slt +++ b/datafusion/sqllogictest/test_files/explain_tree.slt @@ -2211,4 +2211,4 @@ physical_plan 66)└───────────────────────────┘└───────────────────────────┘ statement ok -DROP TABLE t \ No newline at end of file +DROP TABLE t From bac019798ecd83a9d1239d8ff06a3be4825723fb Mon Sep 17 00:00:00 2001 From: zhuqi-lucas <821684824@qq.com> Date: Sat, 9 Aug 2025 11:42:29 +0800 Subject: [PATCH 20/22] fix --- .../sqllogictest/test_files/explain_tree.slt | 64 +++++++++---------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/datafusion/sqllogictest/test_files/explain_tree.slt b/datafusion/sqllogictest/test_files/explain_tree.slt index c9f6d69dbe1d..f57c50506893 100644 --- a/datafusion/sqllogictest/test_files/explain_tree.slt +++ b/datafusion/sqllogictest/test_files/explain_tree.slt @@ -1464,8 +1464,8 @@ drop table t2; # prepare table statement ok CREATE UNBOUNDED EXTERNAL TABLE data ( - "date" DATE, - "ticker" VARCHAR, + "date" DATE, + "ticker" VARCHAR, "time" TIMESTAMP, ) STORED AS CSV WITH ORDER ("date", "ticker", "time") @@ -1474,8 +1474,8 @@ LOCATION './a.parquet'; # query query TT -explain SELECT * FROM data -WHERE ticker = 'A' +explain SELECT * FROM data +WHERE ticker = 'A' ORDER BY "date", "time"; ---- physical_plan @@ -1637,7 +1637,7 @@ physical_plan # same thing but order by time, date query TT -explain SELECT * FROM data +explain SELECT * FROM data WHERE ticker = 'A' AND CAST(time AS DATE) = date ORDER BY "time", "date"; ---- @@ -1682,8 +1682,8 @@ physical_plan # query query TT -explain SELECT * FROM data -WHERE date = '2006-01-02' +explain SELECT * FROM data +WHERE date = '2006-01-02' ORDER BY "ticker", "time"; ---- physical_plan @@ -1990,19 +1990,19 @@ CREATE TABLE t (k int) # By default, the plan of this large query is cropped query TT -EXPLAIN SELECT * FROM t t1, t t2, t t3, t t4, t t5, t t6, t t7, t t8, t t9, t t10 +EXPLAIN SELECT * FROM t t1, t t2, t t3, t t4, t t5, t t6, t t7, t t8, t t9, t t10 ---- physical_plan 01)┌───────────────────────────┐ 02)│ CrossJoinExec ├──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── -03)└─────────────┬─────────────┘ -04)┌─────────────┴─────────────┐ -05)│ CrossJoinExec │ -06)│ │ -07)│ ├─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ -08)│ │ │ -09)│ │ │ -10)└─────────────┬─────────────┘ │ +03)└─────────────┬─────────────┘ +04)┌─────────────┴─────────────┐ +05)│ CrossJoinExec │ +06)│ │ +07)│ ├─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +08)│ │ │ +09)│ │ │ +10)└─────────────┬─────────────┘ │ 11)┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐ 12)│ CrossJoinExec │ │ DataSourceExec │ 13)│ │ │ -------------------- │ @@ -2065,7 +2065,7 @@ statement ok SET datafusion.explain.tree_maximum_render_width = 0 query TT -EXPLAIN SELECT * FROM t t1, t t2, t t3, t t4, t t5, t t6, t t7, t t8, t t9, t t10 +EXPLAIN SELECT * FROM t t1, t t2, t t3, t t4, t t5, t t6, t t7, t t8, t t9, t t10 ---- physical_plan 01)┌───────────────────────────┐ @@ -2140,51 +2140,51 @@ statement ok SET datafusion.explain.tree_maximum_render_width = 60 query TT -EXPLAIN SELECT * FROM t t1, t t2, t t3, t t4, t t5, t t6, t t7, t t8, t t9, t t10 +EXPLAIN SELECT * FROM t t1, t t2, t t3, t t4, t t5, t t6, t t7, t t8, t t9, t t10 ---- physical_plan 01)┌───────────────────────────┐ -02)│ CrossJoinExec ├────────────────────────────────────────────────────────── +02)│ CrossJoinExec ├────────────────────────────────────────────────────────── 03)└─────────────┬─────────────┘ 04)┌─────────────┴─────────────┐ 05)│ CrossJoinExec │ 06)│ │ -07)│ ├────────────────────────────────────────────────────────── +07)│ ├────────────────────────────────────────────────────────── 08)│ │ 09)│ │ 10)└─────────────┬─────────────┘ 11)┌─────────────┴─────────────┐ 12)│ CrossJoinExec │ 13)│ │ -14)│ ├────────────────────────────────────────────────────────── +14)│ ├────────────────────────────────────────────────────────── 15)│ │ 16)│ │ 17)└─────────────┬─────────────┘ 18)┌─────────────┴─────────────┐ 19)│ CrossJoinExec │ 20)│ │ -21)│ ├────────────────────────────────────────────────────────── +21)│ ├────────────────────────────────────────────────────────── 22)│ │ 23)│ │ 24)└─────────────┬─────────────┘ 25)┌─────────────┴─────────────┐ 26)│ CrossJoinExec │ 27)│ │ -28)│ ├────────────────────────────────────────────────────────── +28)│ ├────────────────────────────────────────────────────────── 29)│ │ 30)│ │ 31)└─────────────┬─────────────┘ 32)┌─────────────┴─────────────┐ 33)│ CrossJoinExec │ 34)│ │ -35)│ ├────────────────────────────────────────────────────────── +35)│ ├────────────────────────────────────────────────────────── 36)│ │ 37)│ │ 38)└─────────────┬─────────────┘ 39)┌─────────────┴─────────────┐ 40)│ CrossJoinExec │ 41)│ │ -42)│ ├────────────────────────────────────────────────────────── +42)│ ├────────────────────────────────────────────────────────── 43)│ │ 44)│ │ 45)└─────────────┬─────────────┘ @@ -2195,13 +2195,13 @@ physical_plan 50)│ │ │ 51)│ │ │ 52)└─────────────┬─────────────┘ │ -53)┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐ -54)│ CrossJoinExec │ │ DataSourceExec │ -55)│ │ │ -------------------- │ -56)│ ├──────────────┐ │ bytes: 0 │ -57)│ │ │ │ format: memory │ -58)│ │ │ │ rows: 0 │ -59)└─────────────┬─────────────┘ │ └───────────────────────────┘ +53)┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐ +54)│ CrossJoinExec │ │ DataSourceExec │ +55)│ │ │ -------------------- │ +56)│ ├──────────────┐ │ bytes: 0 │ +57)│ │ │ │ format: memory │ +58)│ │ │ │ rows: 0 │ +59)└─────────────┬─────────────┘ │ └───────────────────────────┘ 60)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ 61)│ DataSourceExec ││ DataSourceExec │ 62)│ -------------------- ││ -------------------- │ From 8bbadaf7bfda68221d05c25cb2a4c527af2bebb7 Mon Sep 17 00:00:00 2001 From: zhuqi-lucas <821684824@qq.com> Date: Sun, 10 Aug 2025 11:42:52 +0800 Subject: [PATCH 21/22] Address comments --- datafusion/physical-plan/src/coalesce/mod.rs | 47 ++++++++++++++----- .../physical-plan/src/coalesce_batches.rs | 15 ++++-- 2 files changed, 46 insertions(+), 16 deletions(-) diff --git a/datafusion/physical-plan/src/coalesce/mod.rs b/datafusion/physical-plan/src/coalesce/mod.rs index feaa2b567350..7c434fd1df12 100644 --- a/datafusion/physical-plan/src/coalesce/mod.rs +++ b/datafusion/physical-plan/src/coalesce/mod.rs @@ -35,6 +35,17 @@ pub struct LimitedBatchCoalescer { finished: bool, } +/// Status returned by [`LimitedBatchCoalescer::push_batch`] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum PushBatchStatus { + /// The limit has **not** been reached, and more batches can be pushed + Continue, + /// The limit **has** been reached after processing this batch + /// The caller should call [`LimitedBatchCoalescer::finish`] + /// to flush any buffered rows and stop pushing more batches. + LimitReached, +} + impl LimitedBatchCoalescer { /// Create a new `BatchCoalescer` /// @@ -61,11 +72,21 @@ impl LimitedBatchCoalescer { self.inner.schema() } - /// Push next batch, and returns [`true`] indicating if the limit is hit + /// Pushes the next [`RecordBatch`] into the coalescer and returns its status. + /// + /// # Arguments + /// * `batch` - The [`RecordBatch`] to append. + /// + /// # Returns + /// * [`PushBatchStatus::Continue`] - More batches can still be pushed. + /// * [`PushBatchStatus::LimitReached`] - The row limit was reached after processing + /// this batch. The caller should call [`Self::finish`] before retrieving the + /// remaining buffered batches. /// - /// If the limit is reached, the caller must call [`Self::finish()`] to - /// complete the buffered results as a batch and finish the query. - pub fn push_batch(&mut self, batch: RecordBatch) -> Result { + /// # Errors + /// Returns an error if called after [`Self::finish`] or if the internal push + /// operation fails. + pub fn push_batch(&mut self, batch: RecordBatch) -> Result { if self.finished { return internal_err!( "LimitedBatchCoalescer: cannot push batch after finish" @@ -76,7 +97,7 @@ impl LimitedBatchCoalescer { if let Some(fetch) = self.fetch { // limit previously reached if self.total_rows >= fetch { - return Ok(true); + return Ok(PushBatchStatus::LimitReached); } // limit now reached @@ -88,14 +109,14 @@ impl LimitedBatchCoalescer { let batch_head = batch.slice(0, remaining_rows); self.total_rows += batch_head.num_rows(); self.inner.push_batch(batch_head)?; - return Ok(true); + return Ok(PushBatchStatus::LimitReached); } } + // Limit not reached, push the entire batch self.total_rows += batch.num_rows(); self.inner.push_batch(batch)?; - - Ok(false) // not at limit + Ok(PushBatchStatus::Continue) } /// Return true if there is no data buffered @@ -276,9 +297,13 @@ mod tests { let mut output_batches = vec![]; for batch in input_batches { - if coalescer.push_batch(batch).unwrap() { - // at limit, finish the coalescer - break; + match coalescer.push_batch(batch).unwrap() { + PushBatchStatus::Continue => { + // continue pushing batches + } + PushBatchStatus::LimitReached => { + break; + } } } coalescer.finish().unwrap(); diff --git a/datafusion/physical-plan/src/coalesce_batches.rs b/datafusion/physical-plan/src/coalesce_batches.rs index 473ac5693273..20f4ca9d0b6a 100644 --- a/datafusion/physical-plan/src/coalesce_batches.rs +++ b/datafusion/physical-plan/src/coalesce_batches.rs @@ -34,7 +34,7 @@ use datafusion_common::Result; use datafusion_execution::TaskContext; use datafusion_physical_expr::PhysicalExpr; -use crate::coalesce::LimitedBatchCoalescer; +use crate::coalesce::{LimitedBatchCoalescer, PushBatchStatus}; use crate::execution_plan::CardinalityEffect; use crate::filter_pushdown::{ ChildPushdownResult, FilterDescription, FilterPushdownPhase, @@ -302,10 +302,15 @@ impl CoalesceBatchesStream { self.coalescer.finish()?; } Some(Ok(batch)) => { - if self.coalescer.push_batch(batch)? { - // limit was reached, so stop early - self.completed = true; - self.coalescer.finish()?; + match self.coalescer.push_batch(batch)? { + PushBatchStatus::Continue => { + // Keep pushing more batches + } + PushBatchStatus::LimitReached => { + // limit was reached, so stop early + self.completed = true; + self.coalescer.finish()?; + } } } // Error case From 940d49d2bbcc1d93452981c1919f8471987005d9 Mon Sep 17 00:00:00 2001 From: zhuqi-lucas <821684824@qq.com> Date: Wed, 13 Aug 2025 16:56:12 +0800 Subject: [PATCH 22/22] address new comments --- datafusion/physical-plan/src/coalesce/mod.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/datafusion/physical-plan/src/coalesce/mod.rs b/datafusion/physical-plan/src/coalesce/mod.rs index e58d8e1767aa..5e2a9daaa6f3 100644 --- a/datafusion/physical-plan/src/coalesce/mod.rs +++ b/datafusion/physical-plan/src/coalesce/mod.rs @@ -33,6 +33,8 @@ pub struct LimitedBatchCoalescer { fetch: Option, /// Indicates if the coalescer is finished finished: bool, + /// The biggest size of the coalesced batch + biggest_coalesce_size: usize, } /// Status returned by [`LimitedBatchCoalescer::push_batch`] @@ -54,6 +56,7 @@ impl LimitedBatchCoalescer { /// - `target_batch_size` - the minimum number of rows for each /// output batch (until limit reached) /// - `fetch` - the maximum number of rows to fetch, `None` means fetch all rows + /// - `biggest_coalesce_size` - the max size of the batch to coalesce, now it's fixed to `target_batch_size / 2` pub fn new( schema: SchemaRef, target_batch_size: usize, @@ -64,6 +67,7 @@ impl LimitedBatchCoalescer { total_rows: 0, fetch, finished: false, + biggest_coalesce_size: target_batch_size / 2, } } @@ -113,9 +117,16 @@ impl LimitedBatchCoalescer { } } + let num_rows = batch.num_rows(); // Limit not reached, push the entire batch self.total_rows += batch.num_rows(); self.inner.push_batch(batch)?; + + // If the number of rows in the current batch exceeds the coalesce size, + // we emit the buffered batch early to avoid coalescing for large batches. + if num_rows > self.biggest_coalesce_size { + self.inner.finish_buffered_batch()?; + } Ok(PushBatchStatus::Continue) }