diff --git a/Cargo.lock b/Cargo.lock index c6e3043f..64242eed 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -158,8 +158,8 @@ dependencies = [ "serde_bytes", "serde_json", "snap", - "strum", - "strum_macros", + "strum 0.27.2", + "strum_macros 0.27.2", "thiserror", "uuid", "xz2", @@ -196,25 +196,60 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" +[[package]] +name = "arrow" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e833808ff2d94ed40d9379848a950d995043c7fb3e81a30b383f4c6033821cc" +dependencies = [ + "arrow-arith 56.2.0", + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-cast 56.2.0", + "arrow-csv 56.2.0", + "arrow-data 56.2.0", + "arrow-ipc 56.2.0", + "arrow-json 56.2.0", + "arrow-ord 56.2.0", + "arrow-row 56.2.0", + "arrow-schema 56.2.0", + "arrow-select 56.2.0", + "arrow-string 56.2.0", +] + [[package]] name = "arrow" version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb372a7cbcac02a35d3fb7b3fc1f969ec078e871f9bb899bf00a2e1809bec8a3" dependencies = [ - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-csv", - "arrow-data", - "arrow-ipc", - "arrow-json", - "arrow-ord", - "arrow-row", - "arrow-schema", - "arrow-select", - "arrow-string", + "arrow-arith 57.1.0", + "arrow-array 57.1.0", + "arrow-buffer 57.1.0", + "arrow-cast 57.1.0", + "arrow-csv 57.1.0", + "arrow-data 57.1.0", + "arrow-ipc 57.1.0", + "arrow-json 57.1.0", + "arrow-ord 57.1.0", + "arrow-row 57.1.0", + "arrow-schema 57.1.0", + "arrow-select 57.1.0", + "arrow-string 57.1.0", +] + +[[package]] +name = "arrow-arith" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad08897b81588f60ba983e3ca39bda2b179bdd84dced378e7df81a5313802ef8" +dependencies = [ + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-data 56.2.0", + "arrow-schema 56.2.0", + "chrono", + "num", ] [[package]] @@ -223,14 +258,31 @@ version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0f377dcd19e440174596d83deb49cd724886d91060c07fec4f67014ef9d54049" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 57.1.0", + "arrow-buffer 57.1.0", + "arrow-data 57.1.0", + "arrow-schema 57.1.0", "chrono", "num-traits", ] +[[package]] +name = "arrow-array" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8548ca7c070d8db9ce7aa43f37393e4bfcf3f2d3681df278490772fd1673d08d" +dependencies = [ + "ahash", + "arrow-buffer 56.2.0", + "arrow-data 56.2.0", + "arrow-schema 56.2.0", + "chrono", + "chrono-tz", + "half", + "hashbrown 0.16.1", + "num", +] + [[package]] name = "arrow-array" version = "57.1.0" @@ -238,9 +290,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a23eaff85a44e9fa914660fb0d0bb00b79c4a3d888b5334adb3ea4330c84f002" dependencies = [ "ahash", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-buffer 57.1.0", + "arrow-data 57.1.0", + "arrow-schema 57.1.0", "chrono", "chrono-tz", "half", @@ -250,6 +302,17 @@ dependencies = [ "num-traits", ] +[[package]] +name = "arrow-buffer" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e003216336f70446457e280807a73899dd822feaf02087d31febca1363e2fccc" +dependencies = [ + "bytes", + "half", + "num", +] + [[package]] name = "arrow-buffer" version = "57.1.0" @@ -262,18 +325,39 @@ dependencies = [ "num-traits", ] +[[package]] +name = "arrow-cast" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "919418a0681298d3a77d1a315f625916cb5678ad0d74b9c60108eb15fd083023" +dependencies = [ + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-data 56.2.0", + "arrow-schema 56.2.0", + "arrow-select 56.2.0", + "atoi", + "base64", + "chrono", + "comfy-table", + "half", + "lexical-core", + "num", + "ryu", +] + [[package]] name = "arrow-cast" version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3d131abb183f80c450d4591dc784f8d7750c50c6e2bc3fcaad148afc8361271" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-ord", - "arrow-schema", - "arrow-select", + "arrow-array 57.1.0", + "arrow-buffer 57.1.0", + "arrow-data 57.1.0", + "arrow-ord 57.1.0", + "arrow-schema 57.1.0", + "arrow-select 57.1.0", "atoi", "base64", "chrono", @@ -284,81 +368,173 @@ dependencies = [ "ryu", ] +[[package]] +name = "arrow-csv" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa9bf02705b5cf762b6f764c65f04ae9082c7cfc4e96e0c33548ee3f67012eb" +dependencies = [ + "arrow-array 56.2.0", + "arrow-cast 56.2.0", + "arrow-schema 56.2.0", + "chrono", + "csv", + "csv-core", + "regex", +] + [[package]] name = "arrow-csv" version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2275877a0e5e7e7c76954669366c2aa1a829e340ab1f612e647507860906fb6b" dependencies = [ - "arrow-array", - "arrow-cast", - "arrow-schema", + "arrow-array 57.1.0", + "arrow-cast 57.1.0", + "arrow-schema 57.1.0", "chrono", "csv", "csv-core", "regex", ] +[[package]] +name = "arrow-data" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5c64fff1d142f833d78897a772f2e5b55b36cb3e6320376f0961ab0db7bd6d0" +dependencies = [ + "arrow-buffer 56.2.0", + "arrow-schema 56.2.0", + "half", + "num", +] + [[package]] name = "arrow-data" version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05738f3d42cb922b9096f7786f606fcb8669260c2640df8490533bb2fa38c9d3" dependencies = [ - "arrow-buffer", - "arrow-schema", + "arrow-buffer 57.1.0", + "arrow-schema 57.1.0", "half", "num-integer", "num-traits", ] +[[package]] +name = "arrow-flight" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c8b0ba0784d56bc6266b79f5de7a24b47024e7b3a0045d2ad4df3d9b686099f" +dependencies = [ + "arrow-arith 56.2.0", + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-cast 56.2.0", + "arrow-data 56.2.0", + "arrow-ipc 56.2.0", + "arrow-ord 56.2.0", + "arrow-row 56.2.0", + "arrow-schema 56.2.0", + "arrow-select 56.2.0", + "arrow-string 56.2.0", + "base64", + "bytes", + "futures", + "once_cell", + "paste", + "prost 0.13.5", + "prost-types 0.13.5", + "tonic 0.13.1", +] + [[package]] name = "arrow-flight" version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b5f57c3d39d1b1b7c1376a772ea86a131e7da310aed54ebea9363124bb885e3" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-ipc", - "arrow-schema", + "arrow-array 57.1.0", + "arrow-buffer 57.1.0", + "arrow-cast 57.1.0", + "arrow-ipc 57.1.0", + "arrow-schema 57.1.0", "base64", "bytes", "futures", - "prost", - "prost-types", - "tonic", + "prost 0.14.1", + "prost-types 0.14.1", + "tonic 0.14.2", "tonic-prost", ] +[[package]] +name = "arrow-ipc" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d3594dcddccc7f20fd069bc8e9828ce37220372680ff638c5e00dea427d88f5" +dependencies = [ + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-data 56.2.0", + "arrow-schema 56.2.0", + "arrow-select 56.2.0", + "flatbuffers", + "lz4_flex 0.11.5", + "zstd", +] + [[package]] name = "arrow-ipc" version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d09446e8076c4b3f235603d9ea7c5494e73d441b01cd61fb33d7254c11964b3" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 57.1.0", + "arrow-buffer 57.1.0", + "arrow-data 57.1.0", + "arrow-schema 57.1.0", + "arrow-select 57.1.0", "flatbuffers", - "lz4_flex", + "lz4_flex 0.12.0", "zstd", ] +[[package]] +name = "arrow-json" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88cf36502b64a127dc659e3b305f1d993a544eab0d48cce704424e62074dc04b" +dependencies = [ + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-cast 56.2.0", + "arrow-data 56.2.0", + "arrow-schema 56.2.0", + "chrono", + "half", + "indexmap", + "lexical-core", + "memchr", + "num", + "serde", + "serde_json", + "simdutf8", +] + [[package]] name = "arrow-json" version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "371ffd66fa77f71d7628c63f209c9ca5341081051aa32f9c8020feb0def787c0" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", + "arrow-array 57.1.0", + "arrow-buffer 57.1.0", + "arrow-cast 57.1.0", + "arrow-data 57.1.0", + "arrow-schema 57.1.0", "chrono", "half", "indexmap", @@ -372,17 +548,43 @@ dependencies = [ "simdutf8", ] +[[package]] +name = "arrow-ord" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c8f82583eb4f8d84d4ee55fd1cb306720cddead7596edce95b50ee418edf66f" +dependencies = [ + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-data 56.2.0", + "arrow-schema 56.2.0", + "arrow-select 56.2.0", +] + [[package]] name = "arrow-ord" version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cbc94fc7adec5d1ba9e8cd1b1e8d6f72423b33fe978bf1f46d970fafab787521" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 57.1.0", + "arrow-buffer 57.1.0", + "arrow-data 57.1.0", + "arrow-schema 57.1.0", + "arrow-select 57.1.0", +] + +[[package]] +name = "arrow-row" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d07ba24522229d9085031df6b94605e0f4b26e099fb7cdeec37abd941a73753" +dependencies = [ + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-data 56.2.0", + "arrow-schema 56.2.0", + "half", ] [[package]] @@ -391,13 +593,23 @@ version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "169676f317157dc079cc5def6354d16db63d8861d61046d2f3883268ced6f99f" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 57.1.0", + "arrow-buffer 57.1.0", + "arrow-data 57.1.0", + "arrow-schema 57.1.0", "half", ] +[[package]] +name = "arrow-schema" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3aa9e59c611ebc291c28582077ef25c97f1975383f1479b12f3b9ffee2ffabe" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "arrow-schema" version = "57.1.0" @@ -408,6 +620,20 @@ dependencies = [ "serde_json", ] +[[package]] +name = "arrow-select" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c41dbbd1e97bfcaee4fcb30e29105fb2c75e4d82ae4de70b792a5d3f66b2e7a" +dependencies = [ + "ahash", + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-data 56.2.0", + "arrow-schema 56.2.0", + "num", +] + [[package]] name = "arrow-select" version = "57.1.0" @@ -415,24 +641,41 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae980d021879ea119dd6e2a13912d81e64abed372d53163e804dfe84639d8010" dependencies = [ "ahash", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 57.1.0", + "arrow-buffer 57.1.0", + "arrow-data 57.1.0", + "arrow-schema 57.1.0", "num-traits", ] +[[package]] +name = "arrow-string" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53f5183c150fbc619eede22b861ea7c0eebed8eaac0333eaa7f6da5205fd504d" +dependencies = [ + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-data 56.2.0", + "arrow-schema 56.2.0", + "arrow-select 56.2.0", + "memchr", + "num", + "regex", + "regex-syntax", +] + [[package]] name = "arrow-string" version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf35e8ef49dcf0c5f6d175edee6b8af7b45611805333129c541a8b89a0fc0534" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 57.1.0", + "arrow-buffer 57.1.0", + "arrow-data 57.1.0", + "arrow-schema 57.1.0", + "arrow-select 57.1.0", "memchr", "num-traits", "regex", @@ -951,10 +1194,13 @@ checksum = "5b098575ebe77cb6d14fc7f32749631a6e44edbef6b796f89b020e99ba20d425" dependencies = [ "axum-core 0.5.5", "bytes", + "form_urlencoded", "futures-util", "http 1.3.1", "http-body 1.0.1", "http-body-util", + "hyper 1.8.1", + "hyper-util", "itoa", "matchit 0.8.4", "memchr", @@ -962,10 +1208,15 @@ dependencies = [ "percent-encoding", "pin-project-lite", "serde_core", + "serde_json", + "serde_path_to_error", + "serde_urlencoded", "sync_wrapper", + "tokio", "tower", "tower-layer", "tower-service", + "tracing", ] [[package]] @@ -1005,6 +1256,119 @@ dependencies = [ "sync_wrapper", "tower-layer", "tower-service", + "tracing", +] + +[[package]] +name = "ballista" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f2a3159e4138cf7de0ba8b7267aa97eb0c01d960920b1747dba71e6986ef421" +dependencies = [ + "async-trait", + "ballista-core", + "ballista-executor", + "ballista-scheduler", + "datafusion 50.3.0", + "log", + "tokio", + "url", +] + +[[package]] +name = "ballista-core" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ca9c994b0f51d52a219fbaf4ca1769f807795a998e9d3c50a03ced67693860e" +dependencies = [ + "arrow-flight 56.2.0", + "async-trait", + "aws-config", + "aws-credential-types", + "chrono", + "clap 4.5.53", + "datafusion 50.3.0", + "datafusion-proto 50.3.0", + "datafusion-proto-common 50.3.0", + "futures", + "itertools", + "log", + "md-5", + "object_store", + "parking_lot", + "prost 0.13.5", + "prost-types 0.13.5", + "rand 0.9.2", + "rustc_version", + "serde", + "tokio", + "tokio-stream", + "tonic 0.13.1", + "tonic-build", + "url", + "uuid", +] + +[[package]] +name = "ballista-executor" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad1bf32e64828067a003f201fc0068da4863867936a680af6ca3af7edda94d6e" +dependencies = [ + "arrow 56.2.0", + "arrow-flight 56.2.0", + "async-trait", + "ballista-core", + "clap 4.5.53", + "dashmap", + "datafusion 50.3.0", + "datafusion-proto 50.3.0", + "futures", + "libc", + "log", + "mimalloc", + "parking_lot", + "tempfile", + "tokio", + "tokio-stream", + "tokio-util", + "tonic 0.13.1", + "tracing", + "tracing-appender", + "tracing-subscriber", + "uuid", +] + +[[package]] +name = "ballista-scheduler" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54e618c0dbe63c48f69b417b26a2824a6be849bbf78fd73e7d98b567ce5907f3" +dependencies = [ + "arrow-flight 56.2.0", + "async-trait", + "axum 0.8.7", + "ballista-core", + "clap 4.5.53", + "dashmap", + "datafusion 50.3.0", + "datafusion-proto 50.3.0", + "futures", + "http 1.3.1", + "log", + "object_store", + "parking_lot", + "prost 0.13.5", + "prost-types 0.13.5", + "rand 0.9.2", + "serde", + "tokio", + "tokio-stream", + "tonic 0.13.1", + "tracing", + "tracing-appender", + "tracing-subscriber", + "uuid", ] [[package]] @@ -1321,11 +1685,12 @@ checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" [[package]] name = "comfy-table" -version = "7.2.1" +version = "7.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b03b7db8e0b4b2fdad6c551e634134e99ec000e5c8c3b6856c65e8bbaded7a3b" +checksum = "e0d05af1e006a2407bedef5af410552494ce5be9090444dbbcb57258c1af3d56" dependencies = [ - "unicode-segmentation", + "strum 0.26.3", + "strum_macros 0.26.4", "unicode-width 0.2.2", ] @@ -1536,55 +1901,110 @@ dependencies = [ "parking_lot_core", ] +[[package]] +name = "datafusion" +version = "50.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2af15bb3c6ffa33011ef579f6b0bcbe7c26584688bd6c994f548e44df67f011a" +dependencies = [ + "arrow 56.2.0", + "arrow-ipc 56.2.0", + "arrow-schema 56.2.0", + "async-trait", + "bytes", + "bzip2 0.6.1", + "chrono", + "datafusion-catalog 50.3.0", + "datafusion-catalog-listing 50.3.0", + "datafusion-common 50.3.0", + "datafusion-common-runtime 50.3.0", + "datafusion-datasource 50.3.0", + "datafusion-datasource-csv 50.3.0", + "datafusion-datasource-json 50.3.0", + "datafusion-datasource-parquet 50.3.0", + "datafusion-execution 50.3.0", + "datafusion-expr 50.3.0", + "datafusion-expr-common 50.3.0", + "datafusion-functions 50.3.0", + "datafusion-functions-aggregate 50.3.0", + "datafusion-functions-nested 50.3.0", + "datafusion-functions-table 50.3.0", + "datafusion-functions-window 50.3.0", + "datafusion-optimizer 50.3.0", + "datafusion-physical-expr 50.3.0", + "datafusion-physical-expr-adapter 50.3.0", + "datafusion-physical-expr-common 50.3.0", + "datafusion-physical-optimizer 50.3.0", + "datafusion-physical-plan 50.3.0", + "datafusion-session 50.3.0", + "datafusion-sql 50.3.0", + "flate2", + "futures", + "itertools", + "log", + "object_store", + "parking_lot", + "parquet 56.2.0", + "rand 0.9.2", + "regex", + "sqlparser 0.58.0", + "tempfile", + "tokio", + "url", + "uuid", + "xz2", + "zstd", +] + [[package]] name = "datafusion" version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ba7cb113e9c0bedf9e9765926031e132fa05a1b09ba6e93a6d1a4d7044457b8" dependencies = [ - "arrow", - "arrow-schema", + "arrow 57.1.0", + "arrow-schema 57.1.0", "async-trait", "bytes", "bzip2 0.6.1", "chrono", - "datafusion-catalog", - "datafusion-catalog-listing", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", + "datafusion-catalog 51.0.0", + "datafusion-catalog-listing 51.0.0", + "datafusion-common 51.0.0", + "datafusion-common-runtime 51.0.0", + "datafusion-datasource 51.0.0", "datafusion-datasource-arrow", "datafusion-datasource-avro", - "datafusion-datasource-csv", - "datafusion-datasource-json", - "datafusion-datasource-parquet", - "datafusion-execution", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-functions", - "datafusion-functions-aggregate", - "datafusion-functions-nested", - "datafusion-functions-table", - "datafusion-functions-window", - "datafusion-optimizer", - "datafusion-physical-expr", - "datafusion-physical-expr-adapter", - "datafusion-physical-expr-common", - "datafusion-physical-optimizer", - "datafusion-physical-plan", - "datafusion-session", - "datafusion-sql", + "datafusion-datasource-csv 51.0.0", + "datafusion-datasource-json 51.0.0", + "datafusion-datasource-parquet 51.0.0", + "datafusion-execution 51.0.0", + "datafusion-expr 51.0.0", + "datafusion-expr-common 51.0.0", + "datafusion-functions 51.0.0", + "datafusion-functions-aggregate 51.0.0", + "datafusion-functions-nested 51.0.0", + "datafusion-functions-table 51.0.0", + "datafusion-functions-window 51.0.0", + "datafusion-optimizer 51.0.0", + "datafusion-physical-expr 51.0.0", + "datafusion-physical-expr-adapter 51.0.0", + "datafusion-physical-expr-common 51.0.0", + "datafusion-physical-optimizer 51.0.0", + "datafusion-physical-plan 51.0.0", + "datafusion-session 51.0.0", + "datafusion-sql 51.0.0", "flate2", "futures", "itertools", "log", "object_store", "parking_lot", - "parquet", + "parquet 57.1.0", "rand 0.9.2", "regex", "rstest", - "sqlparser", + "sqlparser 0.59.0", "tempfile", "tokio", "url", @@ -1595,21 +2015,22 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "51.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66a3a799f914a59b1ea343906a0486f17061f39509af74e874a866428951130d" +checksum = "187622262ad8f7d16d3be9202b4c1e0116f1c9aa387e5074245538b755261621" dependencies = [ - "arrow", + "arrow 56.2.0", "async-trait", "dashmap", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-plan", - "datafusion-session", + "datafusion-common 50.3.0", + "datafusion-common-runtime 50.3.0", + "datafusion-datasource 50.3.0", + "datafusion-execution 50.3.0", + "datafusion-expr 50.3.0", + "datafusion-physical-expr 50.3.0", + "datafusion-physical-plan 50.3.0", + "datafusion-session 50.3.0", + "datafusion-sql 50.3.0", "futures", "itertools", "log", @@ -1619,57 +2040,130 @@ dependencies = [ ] [[package]] -name = "datafusion-catalog-listing" +name = "datafusion-catalog" version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db1b113c80d7a0febcd901476a57aef378e717c54517a163ed51417d87621b0" +checksum = "66a3a799f914a59b1ea343906a0486f17061f39509af74e874a866428951130d" dependencies = [ - "arrow", + "arrow 57.1.0", "async-trait", - "datafusion-catalog", - "datafusion-common", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-expr-adapter", - "datafusion-physical-expr-common", - "datafusion-physical-plan", + "dashmap", + "datafusion-common 51.0.0", + "datafusion-common-runtime 51.0.0", + "datafusion-datasource 51.0.0", + "datafusion-execution 51.0.0", + "datafusion-expr 51.0.0", + "datafusion-physical-expr 51.0.0", + "datafusion-physical-plan 51.0.0", + "datafusion-session 51.0.0", "futures", "itertools", "log", "object_store", + "parking_lot", "tokio", ] [[package]] -name = "datafusion-cli" -version = "51.0.0" +name = "datafusion-catalog-listing" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fab982df44f818a749cb5200504ccb919f4608cb9808daf8b3fb98aa7955fd1e" +checksum = "9657314f0a32efd0382b9a46fdeb2d233273ece64baa68a7c45f5a192daf0f83" dependencies = [ - "arrow", + "arrow 56.2.0", "async-trait", - "aws-config", - "aws-credential-types", - "chrono", - "clap 4.5.53", - "datafusion", - "datafusion-common", - "dirs", - "env_logger", + "datafusion-catalog 50.3.0", + "datafusion-common 50.3.0", + "datafusion-datasource 50.3.0", + "datafusion-execution 50.3.0", + "datafusion-expr 50.3.0", + "datafusion-physical-expr 50.3.0", + "datafusion-physical-expr-common 50.3.0", + "datafusion-physical-plan 50.3.0", + "datafusion-session 50.3.0", "futures", "log", - "mimalloc", "object_store", - "parking_lot", - "parquet", - "regex", + "tokio", +] + +[[package]] +name = "datafusion-catalog-listing" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db1b113c80d7a0febcd901476a57aef378e717c54517a163ed51417d87621b0" +dependencies = [ + "arrow 57.1.0", + "async-trait", + "datafusion-catalog 51.0.0", + "datafusion-common 51.0.0", + "datafusion-datasource 51.0.0", + "datafusion-execution 51.0.0", + "datafusion-expr 51.0.0", + "datafusion-physical-expr 51.0.0", + "datafusion-physical-expr-adapter 51.0.0", + "datafusion-physical-expr-common 51.0.0", + "datafusion-physical-plan 51.0.0", + "futures", + "itertools", + "log", + "object_store", + "tokio", +] + +[[package]] +name = "datafusion-cli" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fab982df44f818a749cb5200504ccb919f4608cb9808daf8b3fb98aa7955fd1e" +dependencies = [ + "arrow 57.1.0", + "async-trait", + "aws-config", + "aws-credential-types", + "chrono", + "clap 4.5.53", + "datafusion 51.0.0", + "datafusion-common 51.0.0", + "dirs", + "env_logger", + "futures", + "log", + "mimalloc", + "object_store", + "parking_lot", + "parquet 57.1.0", + "regex", "rustyline", "tokio", "url", ] +[[package]] +name = "datafusion-common" +version = "50.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a83760d9a13122d025fbdb1d5d5aaf93dd9ada5e90ea229add92aa30898b2d1" +dependencies = [ + "ahash", + "arrow 56.2.0", + "arrow-ipc 56.2.0", + "base64", + "chrono", + "half", + "hashbrown 0.14.5", + "indexmap", + "libc", + "log", + "object_store", + "parquet 56.2.0", + "paste", + "recursive", + "sqlparser 0.58.0", + "tokio", + "web-time", +] + [[package]] name = "datafusion-common" version = "51.0.0" @@ -1678,8 +2172,8 @@ checksum = "7c10f7659e96127d25e8366be7c8be4109595d6a2c3eac70421f380a7006a1b0" dependencies = [ "ahash", "apache-avro", - "arrow", - "arrow-ipc", + "arrow 57.1.0", + "arrow-ipc 57.1.0", "chrono", "half", "hashbrown 0.14.5", @@ -1688,14 +2182,25 @@ dependencies = [ "libc", "log", "object_store", - "parquet", + "parquet 57.1.0", "paste", "recursive", - "sqlparser", + "sqlparser 0.59.0", "tokio", "web-time", ] +[[package]] +name = "datafusion-common-runtime" +version = "50.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b6234a6c7173fe5db1c6c35c01a12b2aa0f803a3007feee53483218817f8b1e" +dependencies = [ + "futures", + "log", + "tokio", +] + [[package]] name = "datafusion-common-runtime" version = "51.0.0" @@ -1707,27 +2212,64 @@ dependencies = [ "tokio", ] +[[package]] +name = "datafusion-datasource" +version = "50.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7256c9cb27a78709dd42d0c80f0178494637209cac6e29d5c93edd09b6721b86" +dependencies = [ + "arrow 56.2.0", + "async-compression", + "async-trait", + "bytes", + "bzip2 0.6.1", + "chrono", + "datafusion-common 50.3.0", + "datafusion-common-runtime 50.3.0", + "datafusion-execution 50.3.0", + "datafusion-expr 50.3.0", + "datafusion-physical-expr 50.3.0", + "datafusion-physical-expr-adapter 50.3.0", + "datafusion-physical-expr-common 50.3.0", + "datafusion-physical-plan 50.3.0", + "datafusion-session 50.3.0", + "flate2", + "futures", + "glob", + "itertools", + "log", + "object_store", + "parquet 56.2.0", + "rand 0.9.2", + "tempfile", + "tokio", + "tokio-util", + "url", + "xz2", + "zstd", +] + [[package]] name = "datafusion-datasource" version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fde13794244bc7581cd82f6fff217068ed79cdc344cafe4ab2c3a1c3510b38d6" dependencies = [ - "arrow", + "arrow 57.1.0", "async-compression", "async-trait", "bytes", "bzip2 0.6.1", "chrono", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-expr-adapter", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-session", + "datafusion-common 51.0.0", + "datafusion-common-runtime 51.0.0", + "datafusion-execution 51.0.0", + "datafusion-expr 51.0.0", + "datafusion-physical-expr 51.0.0", + "datafusion-physical-expr-adapter 51.0.0", + "datafusion-physical-expr-common 51.0.0", + "datafusion-physical-plan 51.0.0", + "datafusion-session 51.0.0", "flate2", "futures", "glob", @@ -1748,18 +2290,18 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "804fa9b4ecf3157982021770617200ef7c1b2979d57bec9044748314775a9aea" dependencies = [ - "arrow", - "arrow-ipc", + "arrow 57.1.0", + "arrow-ipc 57.1.0", "async-trait", "bytes", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-session", + "datafusion-common 51.0.0", + "datafusion-common-runtime 51.0.0", + "datafusion-datasource 51.0.0", + "datafusion-execution 51.0.0", + "datafusion-expr 51.0.0", + "datafusion-physical-expr-common 51.0.0", + "datafusion-physical-plan 51.0.0", + "datafusion-session 51.0.0", "futures", "itertools", "object_store", @@ -1773,91 +2315,174 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "388ed8be535f562cc655b9c3d22edbfb0f1a50a25c242647a98b6d92a75b55a1" dependencies = [ "apache-avro", - "arrow", + "arrow 57.1.0", "async-trait", "bytes", - "datafusion-common", - "datafusion-datasource", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-session", + "datafusion-common 51.0.0", + "datafusion-datasource 51.0.0", + "datafusion-physical-expr-common 51.0.0", + "datafusion-physical-plan 51.0.0", + "datafusion-session 51.0.0", "futures", "num-traits", "object_store", ] +[[package]] +name = "datafusion-datasource-csv" +version = "50.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64533a90f78e1684bfb113d200b540f18f268134622d7c96bbebc91354d04825" +dependencies = [ + "arrow 56.2.0", + "async-trait", + "bytes", + "datafusion-catalog 50.3.0", + "datafusion-common 50.3.0", + "datafusion-common-runtime 50.3.0", + "datafusion-datasource 50.3.0", + "datafusion-execution 50.3.0", + "datafusion-expr 50.3.0", + "datafusion-physical-expr 50.3.0", + "datafusion-physical-expr-common 50.3.0", + "datafusion-physical-plan 50.3.0", + "datafusion-session 50.3.0", + "futures", + "object_store", + "regex", + "tokio", +] + [[package]] name = "datafusion-datasource-csv" version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61a1641a40b259bab38131c5e6f48fac0717bedb7dc93690e604142a849e0568" dependencies = [ - "arrow", + "arrow 57.1.0", "async-trait", "bytes", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-session", + "datafusion-common 51.0.0", + "datafusion-common-runtime 51.0.0", + "datafusion-datasource 51.0.0", + "datafusion-execution 51.0.0", + "datafusion-expr 51.0.0", + "datafusion-physical-expr-common 51.0.0", + "datafusion-physical-plan 51.0.0", + "datafusion-session 51.0.0", "futures", "object_store", "regex", "tokio", ] +[[package]] +name = "datafusion-datasource-json" +version = "50.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d7ebeb12c77df0aacad26f21b0d033aeede423a64b2b352f53048a75bf1d6e6" +dependencies = [ + "arrow 56.2.0", + "async-trait", + "bytes", + "datafusion-catalog 50.3.0", + "datafusion-common 50.3.0", + "datafusion-common-runtime 50.3.0", + "datafusion-datasource 50.3.0", + "datafusion-execution 50.3.0", + "datafusion-expr 50.3.0", + "datafusion-physical-expr 50.3.0", + "datafusion-physical-expr-common 50.3.0", + "datafusion-physical-plan 50.3.0", + "datafusion-session 50.3.0", + "futures", + "object_store", + "serde_json", + "tokio", +] + [[package]] name = "datafusion-datasource-json" version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adeacdb00c1d37271176f8fb6a1d8ce096baba16ea7a4b2671840c5c9c64fe85" dependencies = [ - "arrow", + "arrow 57.1.0", "async-trait", "bytes", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-session", + "datafusion-common 51.0.0", + "datafusion-common-runtime 51.0.0", + "datafusion-datasource 51.0.0", + "datafusion-execution 51.0.0", + "datafusion-expr 51.0.0", + "datafusion-physical-expr-common 51.0.0", + "datafusion-physical-plan 51.0.0", + "datafusion-session 51.0.0", "futures", "object_store", "tokio", ] +[[package]] +name = "datafusion-datasource-parquet" +version = "50.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09e783c4c7d7faa1199af2df4761c68530634521b176a8d1331ddbc5a5c75133" +dependencies = [ + "arrow 56.2.0", + "async-trait", + "bytes", + "datafusion-catalog 50.3.0", + "datafusion-common 50.3.0", + "datafusion-common-runtime 50.3.0", + "datafusion-datasource 50.3.0", + "datafusion-execution 50.3.0", + "datafusion-expr 50.3.0", + "datafusion-functions-aggregate 50.3.0", + "datafusion-physical-expr 50.3.0", + "datafusion-physical-expr-adapter 50.3.0", + "datafusion-physical-expr-common 50.3.0", + "datafusion-physical-optimizer 50.3.0", + "datafusion-physical-plan 50.3.0", + "datafusion-pruning 50.3.0", + "datafusion-session 50.3.0", + "futures", + "itertools", + "log", + "object_store", + "parking_lot", + "parquet 56.2.0", + "rand 0.9.2", + "tokio", +] + [[package]] name = "datafusion-datasource-parquet" version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43d0b60ffd66f28bfb026565d62b0a6cbc416da09814766a3797bba7d85a3cd9" dependencies = [ - "arrow", + "arrow 57.1.0", "async-trait", "bytes", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions-aggregate-common", - "datafusion-physical-expr", - "datafusion-physical-expr-adapter", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-pruning", - "datafusion-session", + "datafusion-common 51.0.0", + "datafusion-common-runtime 51.0.0", + "datafusion-datasource 51.0.0", + "datafusion-execution 51.0.0", + "datafusion-expr 51.0.0", + "datafusion-functions-aggregate-common 51.0.0", + "datafusion-physical-expr 51.0.0", + "datafusion-physical-expr-adapter 51.0.0", + "datafusion-physical-expr-common 51.0.0", + "datafusion-physical-plan 51.0.0", + "datafusion-pruning 51.0.0", + "datafusion-session 51.0.0", "futures", "itertools", "log", "object_store", "parking_lot", - "parquet", + "parquet 57.1.0", "tokio", ] @@ -1865,15 +2490,15 @@ dependencies = [ name = "datafusion-distributed" version = "0.1.0" dependencies = [ - "arrow", - "arrow-flight", - "arrow-select", + "arrow 57.1.0", + "arrow-flight 57.1.0", + "arrow-select 57.1.0", "async-trait", "bytes", "chrono", "dashmap", - "datafusion", - "datafusion-proto", + "datafusion 51.0.0", + "datafusion-proto 51.0.0", "delegate", "futures", "http 1.3.1", @@ -1882,16 +2507,16 @@ dependencies = [ "itertools", "moka", "object_store", - "parquet", + "parquet 57.1.0", "pin-project", "pretty_assertions", - "prost", + "prost 0.14.1", "rand 0.8.5", "reqwest", "structopt", "tokio", "tokio-stream", - "tonic", + "tonic 0.14.2", "tower", "tpchgen", "tpchgen-arrow", @@ -1904,28 +2529,33 @@ dependencies = [ name = "datafusion-distributed-benchmarks" version = "0.1.0" dependencies = [ - "arrow-flight", + "arrow-flight 57.1.0", "async-trait", "aws-config", "aws-sdk-ec2", "axum 0.7.9", + "ballista", + "ballista-core", + "ballista-executor", + "ballista-scheduler", "chrono", + "clap 4.5.53", "dashmap", - "datafusion", + "datafusion 51.0.0", "datafusion-distributed", - "datafusion-proto", + "datafusion-proto 51.0.0", "env_logger", "futures", "log", "object_store", "openssl", - "parquet", - "prost", + "parquet 57.1.0", + "prost 0.14.1", "serde", "serde_json", "structopt", "tokio", - "tonic", + "tonic 0.14.2", "url", ] @@ -1933,10 +2563,10 @@ dependencies = [ name = "datafusion-distributed-cli" version = "0.1.0" dependencies = [ - "arrow-flight", + "arrow-flight 57.1.0", "async-trait", "clap 4.5.53", - "datafusion", + "datafusion 51.0.0", "datafusion-cli", "datafusion-distributed", "dirs", @@ -1944,59 +2574,120 @@ dependencies = [ "hyper-util", "tokio", "tokio-stream", - "tonic", + "tonic 0.14.2", "tower", "url", ] +[[package]] +name = "datafusion-doc" +version = "50.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99ee6b1d9a80d13f9deb2291f45c07044b8e62fb540dbde2453a18be17a36429" + [[package]] name = "datafusion-doc" version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b99e13947667b36ad713549237362afb054b2d8f8cc447751e23ec61202db07" +[[package]] +name = "datafusion-execution" +version = "50.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4cec0a57653bec7b933fb248d3ffa3fa3ab3bd33bd140dc917f714ac036f531" +dependencies = [ + "arrow 56.2.0", + "async-trait", + "dashmap", + "datafusion-common 50.3.0", + "datafusion-expr 50.3.0", + "futures", + "log", + "object_store", + "parking_lot", + "rand 0.9.2", + "tempfile", + "url", +] + [[package]] name = "datafusion-execution" version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "63695643190679037bc946ad46a263b62016931547bf119859c511f7ff2f5178" dependencies = [ - "arrow", + "arrow 57.1.0", "async-trait", "dashmap", - "datafusion-common", - "datafusion-expr", + "datafusion-common 51.0.0", + "datafusion-expr 51.0.0", "futures", "log", "object_store", "parking_lot", - "parquet", + "parquet 57.1.0", "rand 0.9.2", "tempfile", "url", ] +[[package]] +name = "datafusion-expr" +version = "50.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef76910bdca909722586389156d0aa4da4020e1631994d50fadd8ad4b1aa05fe" +dependencies = [ + "arrow 56.2.0", + "async-trait", + "chrono", + "datafusion-common 50.3.0", + "datafusion-doc 50.3.0", + "datafusion-expr-common 50.3.0", + "datafusion-functions-aggregate-common 50.3.0", + "datafusion-functions-window-common 50.3.0", + "datafusion-physical-expr-common 50.3.0", + "indexmap", + "paste", + "recursive", + "serde_json", + "sqlparser 0.58.0", +] + [[package]] name = "datafusion-expr" version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9a4787cbf5feb1ab351f789063398f67654a6df75c4d37d7f637dc96f951a91" dependencies = [ - "arrow", + "arrow 57.1.0", "async-trait", "chrono", - "datafusion-common", - "datafusion-doc", - "datafusion-expr-common", - "datafusion-functions-aggregate-common", - "datafusion-functions-window-common", - "datafusion-physical-expr-common", + "datafusion-common 51.0.0", + "datafusion-doc 51.0.0", + "datafusion-expr-common 51.0.0", + "datafusion-functions-aggregate-common 51.0.0", + "datafusion-functions-window-common 51.0.0", + "datafusion-physical-expr-common 51.0.0", "indexmap", "itertools", "paste", "recursive", "serde_json", - "sqlparser", + "sqlparser 0.59.0", +] + +[[package]] +name = "datafusion-expr-common" +version = "50.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d155ccbda29591ca71a1344dd6bed26c65a4438072b400df9db59447f590bb6" +dependencies = [ + "arrow 56.2.0", + "datafusion-common 50.3.0", + "indexmap", + "itertools", + "paste", ] [[package]] @@ -2005,31 +2696,60 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5ce2fb1b8c15c9ac45b0863c30b268c69dc9ee7a1ee13ecf5d067738338173dc" dependencies = [ - "arrow", - "datafusion-common", + "arrow 57.1.0", + "datafusion-common 51.0.0", "indexmap", "itertools", "paste", ] +[[package]] +name = "datafusion-functions" +version = "50.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7de2782136bd6014670fd84fe3b0ca3b3e4106c96403c3ae05c0598577139977" +dependencies = [ + "arrow 56.2.0", + "arrow-buffer 56.2.0", + "base64", + "blake2", + "blake3", + "chrono", + "datafusion-common 50.3.0", + "datafusion-doc 50.3.0", + "datafusion-execution 50.3.0", + "datafusion-expr 50.3.0", + "datafusion-expr-common 50.3.0", + "datafusion-macros 50.3.0", + "hex", + "itertools", + "log", + "md-5", + "rand 0.9.2", + "regex", + "sha2", + "unicode-segmentation", + "uuid", +] + [[package]] name = "datafusion-functions" version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "794a9db7f7b96b3346fc007ff25e994f09b8f0511b4cf7dff651fadfe3ebb28f" dependencies = [ - "arrow", - "arrow-buffer", + "arrow 57.1.0", + "arrow-buffer 57.1.0", "base64", "blake2", "blake3", "chrono", - "datafusion-common", - "datafusion-doc", - "datafusion-execution", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-macros", + "datafusion-common 51.0.0", + "datafusion-doc 51.0.0", + "datafusion-execution 51.0.0", + "datafusion-expr 51.0.0", + "datafusion-expr-common 51.0.0", + "datafusion-macros 51.0.0", "hex", "itertools", "log", @@ -2042,6 +2762,27 @@ dependencies = [ "uuid", ] +[[package]] +name = "datafusion-functions-aggregate" +version = "50.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07331fc13603a9da97b74fd8a273f4238222943dffdbbed1c4c6f862a30105bf" +dependencies = [ + "ahash", + "arrow 56.2.0", + "datafusion-common 50.3.0", + "datafusion-doc 50.3.0", + "datafusion-execution 50.3.0", + "datafusion-expr 50.3.0", + "datafusion-functions-aggregate-common 50.3.0", + "datafusion-macros 50.3.0", + "datafusion-physical-expr 50.3.0", + "datafusion-physical-expr-common 50.3.0", + "half", + "log", + "paste", +] + [[package]] name = "datafusion-functions-aggregate" version = "51.0.0" @@ -2049,20 +2790,33 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1c25210520a9dcf9c2b2cbbce31ebd4131ef5af7fc60ee92b266dc7d159cb305" dependencies = [ "ahash", - "arrow", - "datafusion-common", - "datafusion-doc", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions-aggregate-common", - "datafusion-macros", - "datafusion-physical-expr", - "datafusion-physical-expr-common", + "arrow 57.1.0", + "datafusion-common 51.0.0", + "datafusion-doc 51.0.0", + "datafusion-execution 51.0.0", + "datafusion-expr 51.0.0", + "datafusion-functions-aggregate-common 51.0.0", + "datafusion-macros 51.0.0", + "datafusion-physical-expr 51.0.0", + "datafusion-physical-expr-common 51.0.0", "half", "log", "paste", ] +[[package]] +name = "datafusion-functions-aggregate-common" +version = "50.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5951e572a8610b89968a09b5420515a121fbc305c0258651f318dc07c97ab17" +dependencies = [ + "ahash", + "arrow 56.2.0", + "datafusion-common 50.3.0", + "datafusion-expr-common 50.3.0", + "datafusion-physical-expr-common 50.3.0", +] + [[package]] name = "datafusion-functions-aggregate-common" version = "51.0.0" @@ -2070,10 +2824,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62f4a66f3b87300bb70f4124b55434d2ae3fe80455f3574701d0348da040b55d" dependencies = [ "ahash", - "arrow", - "datafusion-common", - "datafusion-expr-common", - "datafusion-physical-expr-common", + "arrow 57.1.0", + "datafusion-common 51.0.0", + "datafusion-expr-common 51.0.0", + "datafusion-physical-expr-common 51.0.0", +] + +[[package]] +name = "datafusion-functions-nested" +version = "50.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdacca9302c3d8fc03f3e94f338767e786a88a33f5ebad6ffc0e7b50364b9ea3" +dependencies = [ + "arrow 56.2.0", + "arrow-ord 56.2.0", + "datafusion-common 50.3.0", + "datafusion-doc 50.3.0", + "datafusion-execution 50.3.0", + "datafusion-expr 50.3.0", + "datafusion-functions 50.3.0", + "datafusion-functions-aggregate 50.3.0", + "datafusion-functions-aggregate-common 50.3.0", + "datafusion-macros 50.3.0", + "datafusion-physical-expr-common 50.3.0", + "itertools", + "log", + "paste", ] [[package]] @@ -2082,65 +2858,120 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae5c06eed03918dc7fe7a9f082a284050f0e9ecf95d72f57712d1496da03b8c4" dependencies = [ - "arrow", - "arrow-ord", - "datafusion-common", - "datafusion-doc", - "datafusion-execution", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-functions", - "datafusion-functions-aggregate", - "datafusion-functions-aggregate-common", - "datafusion-macros", - "datafusion-physical-expr-common", + "arrow 57.1.0", + "arrow-ord 57.1.0", + "datafusion-common 51.0.0", + "datafusion-doc 51.0.0", + "datafusion-execution 51.0.0", + "datafusion-expr 51.0.0", + "datafusion-expr-common 51.0.0", + "datafusion-functions 51.0.0", + "datafusion-functions-aggregate 51.0.0", + "datafusion-functions-aggregate-common 51.0.0", + "datafusion-macros 51.0.0", + "datafusion-physical-expr-common 51.0.0", "itertools", "log", "paste", ] +[[package]] +name = "datafusion-functions-table" +version = "50.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c37ff8a99434fbbad604a7e0669717c58c7c4f14c472d45067c4b016621d981" +dependencies = [ + "arrow 56.2.0", + "async-trait", + "datafusion-catalog 50.3.0", + "datafusion-common 50.3.0", + "datafusion-expr 50.3.0", + "datafusion-physical-plan 50.3.0", + "parking_lot", + "paste", +] + [[package]] name = "datafusion-functions-table" version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db4fed1d71738fbe22e2712d71396db04c25de4111f1ec252b8f4c6d3b25d7f5" dependencies = [ - "arrow", + "arrow 57.1.0", "async-trait", - "datafusion-catalog", - "datafusion-common", - "datafusion-expr", - "datafusion-physical-plan", + "datafusion-catalog 51.0.0", + "datafusion-common 51.0.0", + "datafusion-expr 51.0.0", + "datafusion-physical-plan 51.0.0", "parking_lot", "paste", ] +[[package]] +name = "datafusion-functions-window" +version = "50.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48e2aea7c79c926cffabb13dc27309d4eaeb130f4a21c8ba91cdd241c813652b" +dependencies = [ + "arrow 56.2.0", + "datafusion-common 50.3.0", + "datafusion-doc 50.3.0", + "datafusion-expr 50.3.0", + "datafusion-functions-window-common 50.3.0", + "datafusion-macros 50.3.0", + "datafusion-physical-expr 50.3.0", + "datafusion-physical-expr-common 50.3.0", + "log", + "paste", +] + [[package]] name = "datafusion-functions-window" version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d92206aa5ae21892f1552b4d61758a862a70956e6fd7a95cb85db1de74bc6d1" dependencies = [ - "arrow", - "datafusion-common", - "datafusion-doc", - "datafusion-expr", - "datafusion-functions-window-common", - "datafusion-macros", - "datafusion-physical-expr", - "datafusion-physical-expr-common", + "arrow 57.1.0", + "datafusion-common 51.0.0", + "datafusion-doc 51.0.0", + "datafusion-expr 51.0.0", + "datafusion-functions-window-common 51.0.0", + "datafusion-macros 51.0.0", + "datafusion-physical-expr 51.0.0", + "datafusion-physical-expr-common 51.0.0", "log", "paste", ] +[[package]] +name = "datafusion-functions-window-common" +version = "50.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fead257ab5fd2ffc3b40fda64da307e20de0040fe43d49197241d9de82a487f" +dependencies = [ + "datafusion-common 50.3.0", + "datafusion-physical-expr-common 50.3.0", +] + [[package]] name = "datafusion-functions-window-common" version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53ae9bcc39800820d53a22d758b3b8726ff84a5a3e24cecef04ef4e5fdf1c7cc" dependencies = [ - "datafusion-common", - "datafusion-physical-expr-common", + "datafusion-common 51.0.0", + "datafusion-physical-expr-common 51.0.0", +] + +[[package]] +name = "datafusion-macros" +version = "50.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec6f637bce95efac05cdfb9b6c19579ed4aa5f6b94d951cfa5bb054b7bb4f730" +dependencies = [ + "datafusion-expr 50.3.0", + "quote", + "syn 2.0.110", ] [[package]] @@ -2149,23 +2980,43 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1063ad4c9e094b3f798acee16d9a47bd7372d9699be2de21b05c3bd3f34ab848" dependencies = [ - "datafusion-doc", + "datafusion-doc 51.0.0", "quote", "syn 2.0.110", ] +[[package]] +name = "datafusion-optimizer" +version = "50.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6583ef666ae000a613a837e69e456681a9faa96347bf3877661e9e89e141d8a" +dependencies = [ + "arrow 56.2.0", + "chrono", + "datafusion-common 50.3.0", + "datafusion-expr 50.3.0", + "datafusion-expr-common 50.3.0", + "datafusion-physical-expr 50.3.0", + "indexmap", + "itertools", + "log", + "recursive", + "regex", + "regex-syntax", +] + [[package]] name = "datafusion-optimizer" version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f35f9ec5d08b87fd1893a30c2929f2559c2f9806ca072d8fefca5009dc0f06a" dependencies = [ - "arrow", + "arrow 57.1.0", "chrono", - "datafusion-common", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-physical-expr", + "datafusion-common 51.0.0", + "datafusion-expr 51.0.0", + "datafusion-expr-common 51.0.0", + "datafusion-physical-expr 51.0.0", "indexmap", "itertools", "log", @@ -2174,6 +3025,29 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "datafusion-physical-expr" +version = "50.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8668103361a272cbbe3a61f72eca60c9b7c706e87cc3565bcf21e2b277b84f6" +dependencies = [ + "ahash", + "arrow 56.2.0", + "datafusion-common 50.3.0", + "datafusion-expr 50.3.0", + "datafusion-expr-common 50.3.0", + "datafusion-functions-aggregate-common 50.3.0", + "datafusion-physical-expr-common 50.3.0", + "half", + "hashbrown 0.14.5", + "indexmap", + "itertools", + "log", + "parking_lot", + "paste", + "petgraph 0.8.3", +] + [[package]] name = "datafusion-physical-expr" version = "51.0.0" @@ -2181,19 +3055,34 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c30cc8012e9eedcb48bbe112c6eff4ae5ed19cf3003cb0f505662e88b7014c5d" dependencies = [ "ahash", - "arrow", - "datafusion-common", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-functions-aggregate-common", - "datafusion-physical-expr-common", + "arrow 57.1.0", + "datafusion-common 51.0.0", + "datafusion-expr 51.0.0", + "datafusion-expr-common 51.0.0", + "datafusion-functions-aggregate-common 51.0.0", + "datafusion-physical-expr-common 51.0.0", "half", "hashbrown 0.14.5", "indexmap", "itertools", "parking_lot", "paste", - "petgraph", + "petgraph 0.8.3", +] + +[[package]] +name = "datafusion-physical-expr-adapter" +version = "50.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "815acced725d30601b397e39958e0e55630e0a10d66ef7769c14ae6597298bb0" +dependencies = [ + "arrow 56.2.0", + "datafusion-common 50.3.0", + "datafusion-expr 50.3.0", + "datafusion-functions 50.3.0", + "datafusion-physical-expr 50.3.0", + "datafusion-physical-expr-common 50.3.0", + "itertools", ] [[package]] @@ -2202,12 +3091,26 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f9ff2dbd476221b1f67337699eff432781c4e6e1713d2aefdaa517dfbf79768" dependencies = [ - "arrow", - "datafusion-common", - "datafusion-expr", - "datafusion-functions", - "datafusion-physical-expr", - "datafusion-physical-expr-common", + "arrow 57.1.0", + "datafusion-common 51.0.0", + "datafusion-expr 51.0.0", + "datafusion-functions 51.0.0", + "datafusion-physical-expr 51.0.0", + "datafusion-physical-expr-common 51.0.0", + "itertools", +] + +[[package]] +name = "datafusion-physical-expr-common" +version = "50.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6652fe7b5bf87e85ed175f571745305565da2c0b599d98e697bcbedc7baa47c3" +dependencies = [ + "ahash", + "arrow 56.2.0", + "datafusion-common 50.3.0", + "datafusion-expr-common 50.3.0", + "hashbrown 0.14.5", "itertools", ] @@ -2218,32 +3121,83 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90da43e1ec550b172f34c87ec68161986ced70fd05c8d2a2add66eef9c276f03" dependencies = [ "ahash", - "arrow", - "datafusion-common", - "datafusion-expr-common", + "arrow 57.1.0", + "datafusion-common 51.0.0", + "datafusion-expr-common 51.0.0", "hashbrown 0.14.5", "itertools", ] +[[package]] +name = "datafusion-physical-optimizer" +version = "50.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49b7d623eb6162a3332b564a0907ba00895c505d101b99af78345f1acf929b5c" +dependencies = [ + "arrow 56.2.0", + "datafusion-common 50.3.0", + "datafusion-execution 50.3.0", + "datafusion-expr 50.3.0", + "datafusion-expr-common 50.3.0", + "datafusion-physical-expr 50.3.0", + "datafusion-physical-expr-common 50.3.0", + "datafusion-physical-plan 50.3.0", + "datafusion-pruning 50.3.0", + "itertools", + "log", + "recursive", +] + [[package]] name = "datafusion-physical-optimizer" version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ce9804f799acd7daef3be7aaffe77c0033768ed8fdbf5fb82fc4c5f2e6bc14e6" dependencies = [ - "arrow", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-pruning", + "arrow 57.1.0", + "datafusion-common 51.0.0", + "datafusion-execution 51.0.0", + "datafusion-expr 51.0.0", + "datafusion-expr-common 51.0.0", + "datafusion-physical-expr 51.0.0", + "datafusion-physical-expr-common 51.0.0", + "datafusion-physical-plan 51.0.0", + "datafusion-pruning 51.0.0", "itertools", "recursive", ] +[[package]] +name = "datafusion-physical-plan" +version = "50.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2f7f778a1a838dec124efb96eae6144237d546945587557c9e6936b3414558c" +dependencies = [ + "ahash", + "arrow 56.2.0", + "arrow-ord 56.2.0", + "arrow-schema 56.2.0", + "async-trait", + "chrono", + "datafusion-common 50.3.0", + "datafusion-common-runtime 50.3.0", + "datafusion-execution 50.3.0", + "datafusion-expr 50.3.0", + "datafusion-functions-aggregate-common 50.3.0", + "datafusion-functions-window-common 50.3.0", + "datafusion-physical-expr 50.3.0", + "datafusion-physical-expr-common 50.3.0", + "futures", + "half", + "hashbrown 0.14.5", + "indexmap", + "itertools", + "log", + "parking_lot", + "pin-project-lite", + "tokio", +] + [[package]] name = "datafusion-physical-plan" version = "51.0.0" @@ -2251,19 +3205,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0acf0ad6b6924c6b1aa7d213b181e012e2d3ec0a64ff5b10ee6282ab0f8532ac" dependencies = [ "ahash", - "arrow", - "arrow-ord", - "arrow-schema", + "arrow 57.1.0", + "arrow-ord 57.1.0", + "arrow-schema 57.1.0", "async-trait", "chrono", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions-aggregate-common", - "datafusion-functions-window-common", - "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-common 51.0.0", + "datafusion-common-runtime 51.0.0", + "datafusion-execution 51.0.0", + "datafusion-expr 51.0.0", + "datafusion-functions-aggregate-common 51.0.0", + "datafusion-functions-window-common 51.0.0", + "datafusion-physical-expr 51.0.0", + "datafusion-physical-expr-common 51.0.0", "futures", "half", "hashbrown 0.14.5", @@ -2275,31 +3229,58 @@ dependencies = [ "tokio", ] +[[package]] +name = "datafusion-proto" +version = "50.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7df9f606892e6af45763d94d210634eec69b9bb6ced5353381682ff090028a3" +dependencies = [ + "arrow 56.2.0", + "chrono", + "datafusion 50.3.0", + "datafusion-common 50.3.0", + "datafusion-expr 50.3.0", + "datafusion-proto-common 50.3.0", + "object_store", + "prost 0.13.5", +] + [[package]] name = "datafusion-proto" version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d368093a98a17d1449b1083ac22ed16b7128e4c67789991869480d8c4a40ecb9" dependencies = [ - "arrow", + "arrow 57.1.0", "chrono", - "datafusion-catalog", - "datafusion-catalog-listing", - "datafusion-common", - "datafusion-datasource", + "datafusion-catalog 51.0.0", + "datafusion-catalog-listing 51.0.0", + "datafusion-common 51.0.0", + "datafusion-datasource 51.0.0", "datafusion-datasource-arrow", - "datafusion-datasource-csv", - "datafusion-datasource-json", - "datafusion-datasource-parquet", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions-table", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-proto-common", + "datafusion-datasource-csv 51.0.0", + "datafusion-datasource-json 51.0.0", + "datafusion-datasource-parquet 51.0.0", + "datafusion-execution 51.0.0", + "datafusion-expr 51.0.0", + "datafusion-functions-table 51.0.0", + "datafusion-physical-expr 51.0.0", + "datafusion-physical-expr-common 51.0.0", + "datafusion-physical-plan 51.0.0", + "datafusion-proto-common 51.0.0", "object_store", - "prost", + "prost 0.14.1", +] + +[[package]] +name = "datafusion-proto-common" +version = "50.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4b14f288ca4ef77743d9672cafecf3adfffff0b9b04af9af79ecbeaaf736901" +dependencies = [ + "arrow 56.2.0", + "datafusion-common 50.3.0", + "prost 0.13.5", ] [[package]] @@ -2308,26 +3289,68 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b6aef3d5e5c1d2bc3114c4876730cb76a9bdc5a8df31ef1b6db48f0c1671895" dependencies = [ - "arrow", - "datafusion-common", - "prost", + "arrow 57.1.0", + "datafusion-common 51.0.0", + "prost 0.14.1", +] + +[[package]] +name = "datafusion-pruning" +version = "50.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd1e59e2ca14fe3c30f141600b10ad8815e2856caa59ebbd0e3e07cd3d127a65" +dependencies = [ + "arrow 56.2.0", + "arrow-schema 56.2.0", + "datafusion-common 50.3.0", + "datafusion-datasource 50.3.0", + "datafusion-expr-common 50.3.0", + "datafusion-physical-expr 50.3.0", + "datafusion-physical-expr-common 50.3.0", + "datafusion-physical-plan 50.3.0", + "itertools", + "log", ] [[package]] name = "datafusion-pruning" version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac2c2498a1f134a9e11a9f5ed202a2a7d7e9774bd9249295593053ea3be999db" +checksum = "ac2c2498a1f134a9e11a9f5ed202a2a7d7e9774bd9249295593053ea3be999db" +dependencies = [ + "arrow 57.1.0", + "datafusion-common 51.0.0", + "datafusion-datasource 51.0.0", + "datafusion-expr-common 51.0.0", + "datafusion-physical-expr 51.0.0", + "datafusion-physical-expr-common 51.0.0", + "datafusion-physical-plan 51.0.0", + "itertools", + "log", +] + +[[package]] +name = "datafusion-session" +version = "50.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21ef8e2745583619bd7a49474e8f45fbe98ebb31a133f27802217125a7b3d58d" dependencies = [ - "arrow", - "datafusion-common", - "datafusion-datasource", - "datafusion-expr-common", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", + "arrow 56.2.0", + "async-trait", + "dashmap", + "datafusion-common 50.3.0", + "datafusion-common-runtime 50.3.0", + "datafusion-execution 50.3.0", + "datafusion-expr 50.3.0", + "datafusion-physical-expr 50.3.0", + "datafusion-physical-plan 50.3.0", + "datafusion-sql 50.3.0", + "futures", "itertools", "log", + "object_store", + "parking_lot", + "tokio", ] [[package]] @@ -2337,29 +3360,46 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f96eebd17555386f459037c65ab73aae8df09f464524c709d6a3134ad4f4776" dependencies = [ "async-trait", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-plan", + "datafusion-common 51.0.0", + "datafusion-execution 51.0.0", + "datafusion-expr 51.0.0", + "datafusion-physical-plan 51.0.0", "parking_lot", ] +[[package]] +name = "datafusion-sql" +version = "50.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89abd9868770386fede29e5a4b14f49c0bf48d652c3b9d7a8a0332329b87d50b" +dependencies = [ + "arrow 56.2.0", + "bigdecimal", + "datafusion-common 50.3.0", + "datafusion-expr 50.3.0", + "indexmap", + "log", + "recursive", + "regex", + "sqlparser 0.58.0", +] + [[package]] name = "datafusion-sql" version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fc195fe60634b2c6ccfd131b487de46dc30eccae8a3c35a13f136e7f440414f" dependencies = [ - "arrow", + "arrow 57.1.0", "bigdecimal", "chrono", - "datafusion-common", - "datafusion-expr", + "datafusion-common 51.0.0", + "datafusion-expr 51.0.0", "indexmap", "log", "recursive", "regex", - "sqlparser", + "sqlparser 0.59.0", ] [[package]] @@ -3480,6 +4520,15 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" +[[package]] +name = "lz4_flex" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a" +dependencies = [ + "twox-hash", +] + [[package]] name = "lz4_flex" version = "0.12.0" @@ -3510,6 +4559,15 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "matchers" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" +dependencies = [ + "regex-automata", +] + [[package]] name = "matchit" version = "0.7.3" @@ -3591,6 +4649,12 @@ dependencies = [ "uuid", ] +[[package]] +name = "multimap" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084" + [[package]] name = "native-tls" version = "0.2.14" @@ -3629,6 +4693,29 @@ dependencies = [ "libc", ] +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + [[package]] name = "num-bigint" version = "0.4.6" @@ -3664,6 +4751,28 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -3830,6 +4939,43 @@ dependencies = [ "windows-link", ] +[[package]] +name = "parquet" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0dbd48ad52d7dccf8ea1b90a3ddbfaea4f69878dd7683e51c507d4bc52b5b27" +dependencies = [ + "ahash", + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-cast 56.2.0", + "arrow-data 56.2.0", + "arrow-ipc 56.2.0", + "arrow-schema 56.2.0", + "arrow-select 56.2.0", + "base64", + "brotli", + "bytes", + "chrono", + "flate2", + "futures", + "half", + "hashbrown 0.16.1", + "lz4_flex 0.11.5", + "num", + "num-bigint", + "object_store", + "paste", + "ring", + "seq-macro", + "simdutf8", + "snap", + "thrift", + "tokio", + "twox-hash", + "zstd", +] + [[package]] name = "parquet" version = "57.1.0" @@ -3837,13 +4983,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be3e4f6d320dd92bfa7d612e265d7d08bba0a240bab86af3425e1d255a511d89" dependencies = [ "ahash", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-ipc", - "arrow-schema", - "arrow-select", + "arrow-array 57.1.0", + "arrow-buffer 57.1.0", + "arrow-cast 57.1.0", + "arrow-data 57.1.0", + "arrow-ipc 57.1.0", + "arrow-schema 57.1.0", + "arrow-select 57.1.0", "base64", "brotli", "bytes", @@ -3852,7 +4998,7 @@ dependencies = [ "futures", "half", "hashbrown 0.16.1", - "lz4_flex", + "lz4_flex 0.12.0", "num-bigint", "num-integer", "num-traits", @@ -3890,6 +5036,16 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +[[package]] +name = "petgraph" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" +dependencies = [ + "fixedbitset", + "indexmap", +] + [[package]] name = "petgraph" version = "0.8.3" @@ -4065,6 +5221,16 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "prost" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" +dependencies = [ + "bytes", + "prost-derive 0.13.5", +] + [[package]] name = "prost" version = "0.14.1" @@ -4072,7 +5238,40 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7231bd9b3d3d33c86b58adbac74b5ec0ad9f496b19d22801d773636feaa95f3d" dependencies = [ "bytes", - "prost-derive", + "prost-derive 0.14.1", +] + +[[package]] +name = "prost-build" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf" +dependencies = [ + "heck 0.5.0", + "itertools", + "log", + "multimap", + "once_cell", + "petgraph 0.7.1", + "prettyplease", + "prost 0.13.5", + "prost-types 0.13.5", + "regex", + "syn 2.0.110", + "tempfile", +] + +[[package]] +name = "prost-derive" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" +dependencies = [ + "anyhow", + "itertools", + "proc-macro2", + "quote", + "syn 2.0.110", ] [[package]] @@ -4088,13 +5287,22 @@ dependencies = [ "syn 2.0.110", ] +[[package]] +name = "prost-types" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" +dependencies = [ + "prost 0.13.5", +] + [[package]] name = "prost-types" version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9b4db3d6da204ed77bb26ba83b6122a73aeb2e87e25fbf7ad2e84c4ccbf8f72" dependencies = [ - "prost", + "prost 0.14.1", ] [[package]] @@ -4755,6 +5963,15 @@ dependencies = [ "digest", ] +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + [[package]] name = "shlex" version = "1.3.0" @@ -4832,6 +6049,17 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "sqlparser" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec4b661c54b1e4b603b37873a18c59920e4c51ea8ea2cf527d925424dbd4437c" +dependencies = [ + "log", + "recursive", + "sqlparser_derive", +] + [[package]] name = "sqlparser" version = "0.59.0" @@ -4909,12 +6137,31 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "strum" +version = "0.26.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" + [[package]] name = "strum" version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" +[[package]] +name = "strum_macros" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.110", +] + [[package]] name = "strum_macros" version = "0.27.2" @@ -5044,6 +6291,15 @@ dependencies = [ "syn 2.0.110", ] +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + [[package]] name = "thrift" version = "0.17.0" @@ -5062,6 +6318,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d" dependencies = [ "deranged", + "itoa", "num-conv", "powerfmt", "serde", @@ -5231,6 +6488,35 @@ dependencies = [ "winnow", ] +[[package]] +name = "tonic" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e581ba15a835f4d9ea06c55ab1bd4dce26fc53752c69a04aac00703bfb49ba9" +dependencies = [ + "async-trait", + "axum 0.8.7", + "base64", + "bytes", + "h2 0.4.12", + "http 1.3.1", + "http-body 1.0.1", + "http-body-util", + "hyper 1.8.1", + "hyper-timeout", + "hyper-util", + "percent-encoding", + "pin-project", + "prost 0.13.5", + "socket2 0.5.10", + "tokio", + "tokio-stream", + "tower", + "tower-layer", + "tower-service", + "tracing", +] + [[package]] name = "tonic" version = "0.14.2" @@ -5260,6 +6546,20 @@ dependencies = [ "tracing", ] +[[package]] +name = "tonic-build" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eac6f67be712d12f0b41328db3137e0d0757645d8904b4cb7d51cd9c2279e847" +dependencies = [ + "prettyplease", + "proc-macro2", + "prost-build", + "prost-types 0.13.5", + "quote", + "syn 2.0.110", +] + [[package]] name = "tonic-prost" version = "0.14.2" @@ -5267,8 +6567,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "66bd50ad6ce1252d87ef024b3d64fe4c3cf54a86fb9ef4c631fdd0ded7aeaa67" dependencies = [ "bytes", - "prost", - "tonic", + "prost 0.14.1", + "tonic 0.14.2", ] [[package]] @@ -5330,15 +6630,15 @@ name = "tpchgen-arrow" version = "2.0.1" source = "git+https://github.com/clflushopt/tpchgen-rs?rev=e83365a5a9101906eb9f78c5607b83bc59849acf#e83365a5a9101906eb9f78c5607b83bc59849acf" dependencies = [ - "arrow", + "arrow 57.1.0", "tpchgen", ] [[package]] name = "tracing" -version = "0.1.41" +version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" dependencies = [ "log", "pin-project-lite", @@ -5346,11 +6646,23 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "tracing-appender" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "786d480bce6247ab75f005b14ae1624ad978d3029d9113f0a22fa1ac773faeaf" +dependencies = [ + "crossbeam-channel", + "thiserror", + "time", + "tracing-subscriber", +] + [[package]] name = "tracing-attributes" -version = "0.1.30" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", @@ -5359,11 +6671,41 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.34" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e" +dependencies = [ + "matchers", + "nu-ansi-term", "once_cell", + "regex-automata", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", ] [[package]] @@ -5456,6 +6798,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + [[package]] name = "vcpkg" version = "0.2.15" diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml index 3c7394a9..e9efe685 100644 --- a/benchmarks/Cargo.toml +++ b/benchmarks/Cargo.toml @@ -8,6 +8,10 @@ default-run = "dfbench" datafusion = { workspace = true } datafusion-proto = { workspace = true } datafusion-distributed = { path = "..", features = ["integration"] } +ballista = { version = "50" } +ballista-executor = { version = "50" } +ballista-scheduler = { version = "50" } +ballista-core = "50" tokio = { version = "1.46.1", features = ["full"] } parquet = { version = "57.1.0" } structopt = { version = "0.3.26" } @@ -28,6 +32,7 @@ object_store = { version = "0.12.4", features = ["aws"] } aws-config = "1" aws-sdk-ec2 = "1" openssl = { version = "0.10", features = ["vendored"] } +clap = "4.5" [[bin]] name = "dfbench" @@ -36,3 +41,15 @@ path = "src/main.rs" [[bin]] name = "worker" path = "cdk/bin/worker.rs" + +[[bin]] +name = "ballista-http" +path = "cdk/bin/ballista_http.rs" + +[[bin]] +name = "ballista-executor" +path = "cdk/bin/ballista_executor.rs" + +[[bin]] +name = "ballista-scheduler" +path = "cdk/bin/ballista_scheduler.rs" diff --git a/benchmarks/cdk/bin/ballista-bench.ts b/benchmarks/cdk/bin/ballista-bench.ts new file mode 100644 index 00000000..02b9d440 --- /dev/null +++ b/benchmarks/cdk/bin/ballista-bench.ts @@ -0,0 +1,97 @@ +import path from "path"; +import {Command} from "commander"; +import {z} from 'zod'; +import {BenchmarkRunner, ROOT, runBenchmark, TableSpec} from "./@bench-common"; + +// Remember to port-forward the ballista HTTP server with +// aws ssm start-session --target {host-id} --document-name AWS-StartPortForwardingSession --parameters "portNumber=9002,localPortNumber=9002" + +async function main() { + const program = new Command(); + + program + .option('--dataset ', 'Dataset to run queries on') + .option('-i, --iterations ', 'Number of iterations', '3') + .option('--query ', 'A specific query to run', undefined) + .parse(process.argv); + + const options = program.opts(); + + const dataset: string = options.dataset + const iterations = parseInt(options.iterations); + const queries = options.query ? [parseInt(options.query)] : []; + + const runner = new BallistaRunner({}); + + const datasetPath = path.join(ROOT, "benchmarks", "data", dataset); + const outputPath = path.join(datasetPath, "remote-results.json") + + await runBenchmark(runner, { + dataset, + iterations, + queries, + outputPath, + }); +} + +const QueryResponse = z.object({ + count: z.number(), + plan: z.string() +}) +type QueryResponse = z.infer + +class BallistaRunner implements BenchmarkRunner { + private url = 'http://localhost:9002'; + + constructor(private readonly options: {}) { + } + + async executeQuery(sql: string): Promise<{ rowCount: number }> { + let response + if (sql.includes("create view")) { + // This is query 15 + let [createView, query, dropView] = sql.split(";") + await this.query(createView); + response = await this.query(query) + await this.query(dropView); + } else { + response = await this.query(sql) + } + + return { rowCount: response.count }; + } + + private async query(sql: string): Promise { + const url = new URL(this.url); + url.searchParams.set('sql', sql); + + const response = await fetch(url.toString()); + + if (!response.ok) { + const msg = await response.text(); + throw new Error(`Query failed: ${response.status} ${msg}`); + } + + const unparsed = await response.json(); + return QueryResponse.parse(unparsed); + } + + async createTables(tables: TableSpec[]): Promise { + let stmt = ''; + for (const table of tables) { + // language=SQL format=false + stmt += ` + DROP TABLE IF EXISTS ${table.name}; + CREATE EXTERNAL TABLE IF NOT EXISTS ${table.name} STORED AS PARQUET LOCATION '${table.s3Path}'; + `; + } + await this.query(stmt); + } + +} + +main() + .catch(err => { + console.error(err) + process.exit(1) + }) diff --git a/benchmarks/cdk/bin/ballista_executor.rs b/benchmarks/cdk/bin/ballista_executor.rs new file mode 100644 index 00000000..87f660c0 --- /dev/null +++ b/benchmarks/cdk/bin/ballista_executor.rs @@ -0,0 +1,36 @@ +use ballista::datafusion::execution::runtime_env::RuntimeEnv; +use ballista::datafusion::prelude::SessionConfig; +use ballista_executor::config::Config; +use ballista_executor::executor_process::{ExecutorProcessConfig, start_executor_process}; +use clap::Parser; +use object_store::aws::AmazonS3Builder; +use std::env; +use std::sync::Arc; +use url::Url; + +#[tokio::main] +async fn main() -> Result<(), Box> { + let opt = Config::parse(); + + let mut config: ExecutorProcessConfig = opt.try_into()?; + + let bucket = env::var("BUCKET").unwrap_or("datafusion-distributed-benchmarks".to_string()); + let s3_url = Url::parse(&format!("s3://{bucket}"))?; + + let s3 = Arc::new( + AmazonS3Builder::from_env() + .with_bucket_name(s3_url.host().unwrap().to_string()) + .build()?, + ); + let runtime_env = Arc::new(RuntimeEnv::default()); + runtime_env.register_object_store(&s3_url, s3); + + config.override_runtime_producer = Some(Arc::new( + move |_: &SessionConfig| -> ballista::datafusion::common::Result> { + Ok(runtime_env.clone()) + }, + )); + + start_executor_process(Arc::new(config)).await?; + Ok(()) +} diff --git a/benchmarks/cdk/bin/ballista_http.rs b/benchmarks/cdk/bin/ballista_http.rs new file mode 100644 index 00000000..948aa1e1 --- /dev/null +++ b/benchmarks/cdk/bin/ballista_http.rs @@ -0,0 +1,124 @@ +use axum::{Json, Router, extract::Query, http::StatusCode, routing::get}; +use ballista::datafusion::common::instant::Instant; +use ballista::datafusion::execution::SessionStateBuilder; +use ballista::datafusion::execution::runtime_env::RuntimeEnv; +use ballista::datafusion::physical_plan::displayable; +use ballista::datafusion::physical_plan::execute_stream; +use ballista::datafusion::prelude::SessionConfig; +use ballista::datafusion::prelude::SessionContext; +use ballista::prelude::*; +use futures::{StreamExt, TryFutureExt}; +use log::{error, info}; +use object_store::aws::AmazonS3Builder; +use serde::Serialize; +use std::collections::HashMap; +use std::error::Error; +use std::fmt::Display; +use std::sync::Arc; +use structopt::StructOpt; +use url::Url; + +#[derive(Serialize)] +struct QueryResult { + plan: String, + count: usize, +} + +#[derive(Debug, StructOpt, Clone)] +#[structopt(about = "worker spawn command")] +struct Cmd { + /// The bucket name. + #[structopt(long, default_value = "datafusion-distributed-benchmarks")] + bucket: String, +} + +#[tokio::main] +async fn main() -> Result<(), Box> { + env_logger::builder() + .filter_level(log::LevelFilter::Info) + .parse_default_env() + .init(); + + let cmd = Cmd::from_args(); + + const LISTENER_ADDR: &str = "0.0.0.0:9002"; + + info!("Starting HTTP listener on {LISTENER_ADDR}..."); + let listener = tokio::net::TcpListener::bind(LISTENER_ADDR).await?; + + // Register S3 object store + let s3_url = Url::parse(&format!("s3://{}", cmd.bucket))?; + + info!("Building shared SessionContext for the whole lifetime of the HTTP listener..."); + let s3 = Arc::new( + AmazonS3Builder::from_env() + .with_bucket_name(s3_url.host().unwrap().to_string()) + .build()?, + ); + let runtime_env = Arc::new(RuntimeEnv::default()); + runtime_env.register_object_store(&s3_url, s3); + + let config = SessionConfig::new_with_ballista().with_ballista_job_name("Benchmarks"); + + let state = SessionStateBuilder::new() + .with_config(config) + .with_default_features() + .with_runtime_env(Arc::clone(&runtime_env)) + .build(); + let ctx = SessionContext::remote_with_state("df://localhost:50050", state).await?; + + let http_server = axum::serve( + listener, + Router::new().route( + "/", + get(move |Query(params): Query>| { + let ctx = ctx.clone(); + + async move { + let sql = params.get("sql").ok_or(err("Missing 'sql' parameter"))?; + + let mut df_opt = None; + for sql in sql.split(";") { + if sql.trim().is_empty() { + continue; + } + let df = ctx.sql(sql).await.map_err(err)?; + df_opt = Some(df); + } + let Some(df) = df_opt else { + return Err(err("Empty 'sql' parameter")); + }; + + let start = Instant::now(); + + info!("Executing query..."); + let physical = df.create_physical_plan().await.map_err(err)?; + let mut stream = + execute_stream(physical.clone(), ctx.task_ctx()).map_err(err)?; + let mut count = 0; + while let Some(batch) = stream.next().await { + count += batch.map_err(err)?.num_rows(); + info!("Gathered {count} rows, query still in progress..") + } + let plan = displayable(physical.as_ref()).indent(true).to_string(); + let elapsed = start.elapsed(); + let ms = elapsed.as_secs_f64() * 1000.0; + info!("Returned {count} rows in {ms} ms"); + + Ok::<_, (StatusCode, String)>(Json(QueryResult { count, plan })) + } + .inspect_err(|(_, msg)| { + error!("Error executing query: {msg}"); + }) + }), + ), + ); + + info!("Started listener HTTP server in {LISTENER_ADDR}"); + http_server.await?; + Ok(()) +} + +fn err(s: impl Display) -> (StatusCode, String) { + (StatusCode::INTERNAL_SERVER_ERROR, s.to_string()) +} diff --git a/benchmarks/cdk/bin/ballista_scheduler.rs b/benchmarks/cdk/bin/ballista_scheduler.rs new file mode 100644 index 00000000..ba0b1f2d --- /dev/null +++ b/benchmarks/cdk/bin/ballista_scheduler.rs @@ -0,0 +1,62 @@ +use ballista::datafusion::execution::runtime_env::RuntimeEnv; +use ballista::datafusion::execution::{SessionState, SessionStateBuilder}; +use ballista::datafusion::prelude::SessionConfig; +use ballista_core::error::BallistaError; +use ballista_core::extension::SessionConfigExt; +use ballista_scheduler::cluster::BallistaCluster; +use ballista_scheduler::config::{Config, SchedulerConfig}; +use ballista_scheduler::scheduler_process::start_server; +use clap::Parser; +use object_store::aws::AmazonS3Builder; +use std::env; +use std::sync::Arc; +use url::Url; + +fn main() -> Result<(), Box> { + let runtime = tokio::runtime::Builder::new_multi_thread() + .enable_io() + .enable_time() + .thread_stack_size(32 * 1024 * 1024) // 32MB + .build()?; + + runtime.block_on(inner()) +} + +async fn inner() -> Result<(), Box> { + let opt = Config::parse(); + + let addr = format!("{}:{}", opt.bind_host, opt.bind_port); + let addr = addr + .parse() + .map_err(|e: std::net::AddrParseError| BallistaError::Configuration(e.to_string()))?; + + let bucket = env::var("BUCKET").unwrap_or("datafusion-distributed-benchmarks".to_string()); + let s3_url = Url::parse(&format!("s3://{bucket}"))?; + + let s3 = Arc::new( + AmazonS3Builder::from_env() + .with_bucket_name(s3_url.host().unwrap().to_string()) + .build()?, + ); + let runtime_env = Arc::new(RuntimeEnv::default()); + runtime_env.register_object_store(&s3_url, s3); + + let config: SchedulerConfig = opt.try_into()?; + let config = config.with_override_config_producer(Arc::new(|| { + SessionConfig::new_with_ballista().with_information_schema(true) + })); + let config = config.with_override_session_builder(Arc::new( + move |cfg: SessionConfig| -> ballista::datafusion::common::Result { + Ok(SessionStateBuilder::new() + .with_config(cfg) + .with_runtime_env(runtime_env.clone()) + .with_default_features() + .build()) + }, + )); + + let cluster = BallistaCluster::new_from_config(&config).await?; + start_server(cluster, addr, Arc::new(config)).await?; + + Ok(()) +} diff --git a/benchmarks/cdk/bin/cdk.ts b/benchmarks/cdk/bin/cdk.ts index 2d1fd482..442fe003 100644 --- a/benchmarks/cdk/bin/cdk.ts +++ b/benchmarks/cdk/bin/cdk.ts @@ -1,12 +1,20 @@ #!/usr/bin/env node import * as cdk from 'aws-cdk-lib/core'; -import { CdkStack } from '../lib/cdk-stack'; +import {CdkStack} from '../lib/cdk-stack'; +import {DATAFUSION_DISTRIBUTED_ENGINE} from "../lib/datafusion-distributed"; +import {TRINO_ENGINE} from "../lib/trino"; +import {BALLISTA_ENGINE} from "../lib/ballista"; const app = new cdk.App(); const config = { - instanceType: 't3.xlarge', - instanceCount: 4, + instanceType: 't3.xlarge', + instanceCount: 4, + engines: [ + DATAFUSION_DISTRIBUTED_ENGINE, + TRINO_ENGINE, + BALLISTA_ENGINE + ] }; new CdkStack(app, 'DataFusionDistributedBenchmarks', { config }); diff --git a/benchmarks/cdk/lib/ballista.ts b/benchmarks/cdk/lib/ballista.ts new file mode 100644 index 00000000..5e138ee3 --- /dev/null +++ b/benchmarks/cdk/lib/ballista.ts @@ -0,0 +1,212 @@ +import { + AfterEc2MachinesContext, + BeforeEc2MachinesContext, + QueryEngine, + ROOT, + sendCommandsUnconditionally, + OnEc2MachinesContext +} from "./cdk-stack"; +import * as s3assets from "aws-cdk-lib/aws-s3-assets"; +import path from "path"; +import {execSync} from "child_process"; + +let ballistaServerBinary: s3assets.Asset +let ballistaSchedulerBinary: s3assets.Asset +let ballistaExecutorBinary: s3assets.Asset + +export const BALLISTA_ENGINE: QueryEngine = { + beforeEc2Machines(ctx: BeforeEc2MachinesContext): void { + console.log('Building Ballista server binary...'); + execSync('cargo zigbuild -p datafusion-distributed-benchmarks --release --bin ballista-http --target x86_64-unknown-linux-gnu', { + cwd: ROOT, + stdio: 'inherit', + }); + console.log('Ballista server binary built successfully'); + + console.log('Building Ballista scheduler...'); + execSync('cargo zigbuild -p datafusion-distributed-benchmarks --release --bin ballista-scheduler --target x86_64-unknown-linux-gnu', { + cwd: ROOT, + stdio: 'inherit', + }); + console.log('Ballista scheduler built successfully'); + + console.log('Building Ballista executor...'); + execSync('cargo zigbuild -p datafusion-distributed-benchmarks --release --bin ballista-executor --target x86_64-unknown-linux-gnu', { + cwd: ROOT, + stdio: 'inherit', + }); + console.log('Ballista scheduler built successfully'); + + ballistaServerBinary = new s3assets.Asset(ctx.scope, 'BallistaServerBinary', { + path: path.join(ROOT, 'target/x86_64-unknown-linux-gnu/release/ballista-http'), + }) + + ballistaSchedulerBinary = new s3assets.Asset(ctx.scope, 'BallistaSchedulerBinary', { + path: path.join(ROOT, 'target/x86_64-unknown-linux-gnu/release/ballista-scheduler'), + }) + + ballistaExecutorBinary = new s3assets.Asset(ctx.scope, 'BallistaExecutorBinary', { + path: path.join(ROOT, 'target/x86_64-unknown-linux-gnu/release/ballista-executor'), + }) + + ballistaServerBinary.grantRead(ctx.role) + ballistaSchedulerBinary.grantRead(ctx.role) + ballistaExecutorBinary.grantRead(ctx.role) + }, + onEc2Machine(ctx: OnEc2MachinesContext): void { + const isScheduler = ctx.instanceIdx === 0; + ctx.instanceUserData.addCommands( + // Download pre-compiled Ballista binaries from S3 + `aws s3 cp s3://${ballistaSchedulerBinary.s3BucketName}/${ballistaSchedulerBinary.s3ObjectKey} /usr/local/bin/ballista-scheduler`, + 'chmod +x /usr/local/bin/ballista-scheduler', + `aws s3 cp s3://${ballistaExecutorBinary.s3BucketName}/${ballistaExecutorBinary.s3ObjectKey} /usr/local/bin/ballista-executor`, + 'chmod +x /usr/local/bin/ballista-executor', + `aws s3 cp s3://${ballistaServerBinary.s3BucketName}/${ballistaServerBinary.s3ObjectKey} /usr/local/bin/ballista-http`, + 'chmod +x /usr/local/bin/ballista-http', + + // Create Ballista directories + 'mkdir -p /var/ballista/scheduler', + 'mkdir -p /var/ballista/executor', + 'mkdir -p /var/ballista/logs', + + // Create Ballista scheduler systemd service (coordinator only) + ...(isScheduler ? [ + `cat > /etc/systemd/system/ballista-scheduler.service << 'BALLISTA_EOF' +[Unit] +Description=Ballista Scheduler +After=network.target + +[Service] +Type=simple +ExecStart=/usr/local/bin/ballista-scheduler \\ + --bind-host 0.0.0.0 \\ + --bind-port 50050 +Restart=on-failure +RestartSec=5 +User=root +WorkingDirectory=/var/ballista/scheduler +StandardOutput=append:/var/ballista/logs/scheduler.log +StandardError=append:/var/ballista/logs/scheduler.log + +[Install] +WantedBy=multi-user.target +BALLISTA_EOF` + ] : []), + + // Create Ballista executor systemd service (all nodes, will be reconfigured for workers) + `cat > /etc/systemd/system/ballista-executor.service << 'BALLISTA_EOF' +[Unit] +Description=Ballista Executor +After=network.target${isScheduler ? ' ballista-scheduler.service' : ''} +${isScheduler ? 'Requires=ballista-scheduler.service' : ''} + +[Service] +Type=simple +ExecStart=/usr/local/bin/ballista-executor \\ + --bind-host 0.0.0.0 \\ + --bind-port 50051 \\ + --work-dir /var/ballista/executor \\ + --scheduler-host localhost \\ + --scheduler-port 50050 +Restart=on-failure +RestartSec=5 +User=root +Environment="BUCKET=${ctx.bucketName}" +WorkingDirectory=/var/ballista/executor +StandardOutput=append:/var/ballista/logs/executor.log +StandardError=append:/var/ballista/logs/executor.log + +[Install] +WantedBy=multi-user.target +BALLISTA_EOF`, + + // Create HTTP server systemd service (coordinator only for now) + ...(isScheduler ? [ + `aws s3 cp s3://${ballistaServerBinary.s3BucketName}/${ballistaServerBinary.s3ObjectKey} /usr/local/bin/ballista-http`, + 'chmod +x /usr/local/bin/ballista-http', + `cat > /etc/systemd/system/ballista-http.service << 'BALLISTA_EOF' +[Unit] +Description=Ballista HTTP Server +After=network.target ballista-scheduler.service +Requires=ballista-scheduler.service + +[Service] +Type=simple +ExecStart=/usr/local/bin/ballista-http --bucket ${ctx.bucketName} +Restart=on-failure +RestartSec=5 +User=root +Environment="RUST_LOG=info" +WorkingDirectory=/var/ballista +StandardOutput=append:/var/ballista/logs/http.log +StandardError=append:/var/ballista/logs/http.log + +[Install] +WantedBy=multi-user.target +BALLISTA_EOF` + ] : []), + + // Reload systemd and enable services + 'systemctl daemon-reload', + + // Enable and start scheduler (coordinator only) + ...(isScheduler ? [ + 'systemctl enable ballista-scheduler', + 'systemctl start ballista-scheduler', + // Wait for scheduler to be ready + 'sleep 5' + ] : []), + + // Enable and start executor (all nodes) + 'systemctl enable ballista-executor', + 'systemctl start ballista-executor', + + // Enable and start HTTP server (coordinator only) + ...(isScheduler ? [ + 'systemctl enable ballista-http', + 'systemctl start ballista-http' + ] : []) + ) + + }, + afterEc2Machines(ctx: AfterEc2MachinesContext) { + const [scheduler, ...executors] = ctx.instances + + // Reconfigure executors on worker nodes to point to scheduler. The executor in the machine holding the scheduler + // communicates to it using localhost, so no need to update it with scheduler.instancePrivateIp. + sendCommandsUnconditionally( + ctx.scope, + "ConfigureBallistaExecutors", + [scheduler, ...executors], + [ + `cat > /etc/systemd/system/ballista-executor.service << 'BALLISTA_EOF' +[Unit] +Description=Ballista Executor +After=network.target + +[Service] +Type=simple +ExecStart=/usr/local/bin/ballista-executor \\ + --bind-host 0.0.0.0 \\ + --bind-port 50051 \\ + --work-dir /var/ballista/executor \\ + --scheduler-host ${scheduler.instancePrivateIp} \\ + --scheduler-port 50050 +Restart=on-failure +RestartSec=5 +User=root +Environment="BUCKET=${ctx.bucketName}" +WorkingDirectory=/var/ballista/executor +StandardOutput=append:/var/ballista/logs/executor.log +StandardError=append:/var/ballista/logs/executor.log + +[Install] +WantedBy=multi-user.target +BALLISTA_EOF`, + 'systemctl daemon-reload', + 'systemctl restart ballista-executor', + ] + ) + } +} + diff --git a/benchmarks/cdk/lib/cdk-stack.ts b/benchmarks/cdk/lib/cdk-stack.ts index c821217f..febeb01d 100644 --- a/benchmarks/cdk/lib/cdk-stack.ts +++ b/benchmarks/cdk/lib/cdk-stack.ts @@ -1,165 +1,189 @@ -import { CfnOutput, RemovalPolicy, Stack, StackProps, Tags } from 'aws-cdk-lib'; +import {CfnOutput, RemovalPolicy, Stack, StackProps, Tags} from 'aws-cdk-lib'; import * as ec2 from 'aws-cdk-lib/aws-ec2'; import * as s3 from 'aws-cdk-lib/aws-s3'; import * as iam from 'aws-cdk-lib/aws-iam'; -import * as s3assets from 'aws-cdk-lib/aws-s3-assets'; -import * as cr from 'aws-cdk-lib/custom-resources'; -import { Construct } from 'constructs'; -import * as path from 'path'; -import { execSync } from 'child_process'; -import { trinoWorkerCommands, trinoUserDataCommands } from "./trino"; +import {Construct} from 'constructs'; +import {DATAFUSION_DISTRIBUTED_ENGINE} from "./datafusion-distributed"; +import {BALLISTA_ENGINE} from "./ballista"; +import {TRINO_ENGINE} from "./trino"; +import path from "path"; +import * as cr from "aws-cdk-lib/custom-resources"; + +const USER_DATA_CAUSES_REPLACEMENT = process.env['USER_DATA_CAUSES_REPLACEMENT'] == 'true' +if (USER_DATA_CAUSES_REPLACEMENT) { + console.warn("Instances will forcefully get replaced") +} -const ROOT = path.join(__dirname, '../../..') +const ENGINES = [ + DATAFUSION_DISTRIBUTED_ENGINE, + BALLISTA_ENGINE, + TRINO_ENGINE +] -interface CdkStackProps extends StackProps { - config: { - instanceType: string; - instanceCount: number; - }; +export const ROOT = path.join(__dirname, '../../..') + +export interface BeforeEc2MachinesContext { + scope: Construct + role: iam.Role } -export class CdkStack extends Stack { - constructor(scope: Construct, id: string, props: CdkStackProps) { - super(scope, id, props); - - const { config } = props; - - // Create VPC with public subnets only (for internet access without NAT gateway) - const vpc = new ec2.Vpc(this, 'BenchmarkVPC', { - maxAzs: 1, - natGateways: 0, - subnetConfiguration: [ - { - name: 'Public', - subnetType: ec2.SubnetType.PUBLIC, - cidrMask: 24, - }, - ], - }); +export interface OnEc2MachinesContext { + instanceIdx: number + instanceUserData: ec2.UserData + region: string + bucketName: string +} - // Create security group that allows instances to communicate - const securityGroup = new ec2.SecurityGroup(this, 'BenchmarkSG', { - vpc, - allowAllOutbound: true, - }); +export interface AfterEc2MachinesContext { + scope: Construct + instances: ec2.Instance[] + bucketName: string + region: string +} - // Allow all traffic between instances in the same security group - securityGroup.addIngressRule( - securityGroup, - ec2.Port.allTraffic(), - 'Allow all traffic between benchmark instances' - ); - - // Create S3 bucket - const bucket = new s3.Bucket(this, 'BenchmarkBucket', { - bucketName: "datafusion-distributed-benchmarks", - autoDeleteObjects: true, - removalPolicy: RemovalPolicy.DESTROY - }); +export interface QueryEngine { + /** Runs before instantiating any EC2 machine */ + beforeEc2Machines(ctx: BeforeEc2MachinesContext): void - // Build worker binary for Linux - console.log('Building worker binary...'); - execSync('cargo zigbuild -p datafusion-distributed-benchmarks --release --bin worker --target x86_64-unknown-linux-gnu', { - cwd: ROOT, - stdio: 'inherit', - }); - console.log('Worker binary built successfully'); + /** Runs for each instantiated EC2 machine */ + onEc2Machine(ctx: OnEc2MachinesContext): void - // Upload worker binary as an asset - const workerBinary = new s3assets.Asset(this, 'WorkerBinary', { - path: path.join(ROOT, 'target/x86_64-unknown-linux-gnu/release/worker'), - }); + /** Runs after all EC2 machines have been instantiated */ + afterEc2Machines(ctx: AfterEc2MachinesContext): void +} - // Create IAM role for EC2 instances - const role = new iam.Role(this, 'BenchmarkInstanceRole', { - assumedBy: new iam.ServicePrincipal('ec2.amazonaws.com'), - managedPolicies: [ - iam.ManagedPolicy.fromAwsManagedPolicyName('AmazonSSMManagedInstanceCore'), - ], - }); - // Grant permissions to describe EC2 instances (for peer discovery) - role.addToPolicy(new iam.PolicyStatement({ - actions: ['ec2:DescribeInstances'], - resources: ['*'], - })); - - // Grant Glue permissions for Trino Hive metastore - role.addToPolicy(new iam.PolicyStatement({ - actions: [ - 'glue:GetDatabase', - 'glue:GetDatabases', - 'glue:GetTable', - 'glue:GetTables', - 'glue:GetPartition', - 'glue:GetPartitions', - 'glue:CreateTable', - 'glue:UpdateTable', - 'glue:DeleteTable', - 'glue:CreateDatabase', - 'glue:UpdateDatabase', - 'glue:DeleteDatabase', - ], - resources: ['*'], - })); - - // Grant read access to the bucket and worker binary - bucket.grantRead(role); - workerBinary.grantRead(role); - - // Create EC2 instances - const instances: ec2.Instance[] = []; - for (let i = 0; i < config.instanceCount; i++) { - const userData = ec2.UserData.forLinux(); - - userData.addCommands( - // Install Rust tooling. - 'yum install gcc', - "curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh", - 'cargo install --locked tokio-console', - - // Create systemd service - `cat > /etc/systemd/system/worker.service << 'EOF' -[Unit] -Description=DataFusion Distributed Worker -After=network.target - -[Service] -Type=simple -ExecStart=/usr/local/bin/worker --bucket ${bucket.bucketName} -Restart=always -User=root - -[Install] -WantedBy=multi-user.target -EOF`, - - // Enable and start the service - 'systemctl daemon-reload', - 'systemctl enable worker', - 'systemctl start worker', - ...trinoUserDataCommands(i, this.region) - ); - - const instance = new ec2.Instance(this, `BenchmarkInstance${i}`, { - vpc, - vpcSubnets: { subnetType: ec2.SubnetType.PUBLIC }, - instanceName: `instance-${i}`, - instanceType: new ec2.InstanceType(config.instanceType), - machineImage: ec2.MachineImage.latestAmazonLinux2023(), - securityGroup, - role, - userData - }); - - // Tag for peer discovery - Tags.of(instance).add('BenchmarkCluster', 'datafusion'); - instances.push(instance); - } +interface CdkStackProps extends StackProps { + config: { + instanceType: string; + instanceCount: number; + engines: QueryEngine[] + }; +} - // Output Session Manager commands for all instances - new CfnOutput(this, 'ConnectCommands', { - value: ` +export class CdkStack extends Stack { + constructor(scope: Construct, id: string, props: CdkStackProps) { + super(scope, id, props); + + const { config } = props; + + // Create VPC with public subnets only (for internet access without NAT gateway) + const vpc = new ec2.Vpc(this, 'BenchmarkVPC', { + maxAzs: 1, + natGateways: 0, + subnetConfiguration: [ + { + name: 'Public', + subnetType: ec2.SubnetType.PUBLIC, + cidrMask: 24, + }, + ], + }); + + // Create security group that allows instances to communicate + const securityGroup = new ec2.SecurityGroup(this, 'BenchmarkSG', { + vpc, + allowAllOutbound: true, + }); + + // Allow all traffic between instances in the same security group + securityGroup.addIngressRule( + securityGroup, + ec2.Port.allTraffic(), + 'Allow all traffic between benchmark instances' + ); + + // Create S3 bucket + const bucket = new s3.Bucket(this, 'BenchmarkBucket', { + bucketName: "datafusion-distributed-benchmarks", + autoDeleteObjects: true, + removalPolicy: RemovalPolicy.DESTROY + }); + + // Create IAM role for EC2 instances + const role = new iam.Role(this, 'BenchmarkInstanceRole', { + assumedBy: new iam.ServicePrincipal('ec2.amazonaws.com'), + managedPolicies: [ + iam.ManagedPolicy.fromAwsManagedPolicyName('AmazonSSMManagedInstanceCore'), + ], + }); + + // Grant permissions to describe EC2 instances (for peer discovery) + role.addToPolicy(new iam.PolicyStatement({ + actions: ['ec2:DescribeInstances'], + resources: ['*'], + })); + + // Grant Glue permissions for Trino Hive metastore + role.addToPolicy(new iam.PolicyStatement({ + actions: [ + 'glue:GetDatabase', + 'glue:GetDatabases', + 'glue:GetTable', + 'glue:GetTables', + 'glue:GetPartition', + 'glue:GetPartitions', + 'glue:CreateTable', + 'glue:UpdateTable', + 'glue:DeleteTable', + 'glue:CreateDatabase', + 'glue:UpdateDatabase', + 'glue:DeleteDatabase', + ], + resources: ['*'], + })); + + // Grant read access to the bucket and worker binary + bucket.grantRead(role); + + for (const engine of ENGINES) { + engine.beforeEc2Machines({ + scope: this, + role + }) + } + + // Create EC2 instances + const instances: ec2.Instance[] = []; + for (let i = 0; i < config.instanceCount; i++) { + const userData = ec2.UserData.forLinux(); + + for (const engine of ENGINES) { + engine.onEc2Machine({ + bucketName: bucket.bucketName, + instanceIdx: i, + instanceUserData: userData, + region: this.region + }) + } + + const instance = new ec2.Instance(this, `BenchmarkInstance${i}`, { + vpc, + vpcSubnets: { subnetType: ec2.SubnetType.PUBLIC }, + instanceName: `instance-${i}`, + instanceType: new ec2.InstanceType(config.instanceType), + machineImage: ec2.MachineImage.latestAmazonLinux2023(), + securityGroup, + role, + userData, + userDataCausesReplacement: USER_DATA_CAUSES_REPLACEMENT, + blockDevices: [{ + deviceName: '/dev/xvda', + volume: ec2.BlockDeviceVolume.ebs(200, { + volumeType: ec2.EbsDeviceVolumeType.GP3, + deleteOnTermination: true, + }), + }], + }); + + // Tag for peer discovery + Tags.of(instance).add('BenchmarkCluster', 'datafusion'); + instances.push(instance); + } + + // Output Session Manager commands for all instances + new CfnOutput(this, 'ConnectCommands', { + value: ` # === select one instance to connect to === ${instances.map(_ => `export INSTANCE_ID=${_.instanceId}`).join("\n")} @@ -173,53 +197,51 @@ aws ssm start-session --target $INSTANCE_ID sudo journalctl -u worker.service -f -o cat `, - description: 'Session Manager commands to connect to instances', - }); - - // Downloads the latest version of the worker binary and restarts the systemd service. - // This is done instead of the userData.addS3Download() so that the instance does not need - // to restart every time a new worker binary is available. - sendCommandsUnconditionally(this, 'RestartWorkerService', instances, [ - `aws s3 cp s3://${workerBinary.s3BucketName}/${workerBinary.s3ObjectKey} /usr/local/bin/worker`, - 'chmod +x /usr/local/bin/worker', - 'systemctl restart worker', - ]) - - // Then start workers (they will discover the coordinator) - const [coordinator, ...workers] = instances - sendCommandsUnconditionally(this, 'TrinoCoordinatorCommands', [coordinator], ['systemctl start trino']) - sendCommandsUnconditionally(this, 'TrinoWorkerCommands', workers, trinoWorkerCommands(coordinator)) - } + description: 'Session Manager commands to connect to instances', + }); + + for (const engine of ENGINES) { + engine.afterEc2Machines({ + scope: this, + instances, + region: this.region, + bucketName: bucket.bucketName + }) + } + } } -function sendCommandsUnconditionally( - construct: Construct, - name: string, - instances: ec2.Instance[], - commands: string[] +export function sendCommandsUnconditionally( + construct: Construct, + name: string, + instances: ec2.Instance[], + commands: string[] ) { - const cmd = new cr.AwsCustomResource(construct, name, { - onUpdate: { - service: 'SSM', - action: 'sendCommand', - parameters: { - DocumentName: 'AWS-RunShellScript', - InstanceIds: instances.map(inst => inst.instanceId), - Parameters: { - commands + const cmd = new cr.AwsCustomResource(construct, name, { + onUpdate: { + service: 'SSM', + action: 'sendCommand', + parameters: { + DocumentName: 'AWS-RunShellScript', + InstanceIds: instances.map(inst => inst.instanceId), + Parameters: { + commands: [ + 'cloud-init status --wait', + ...commands + ] + }, + }, + physicalResourceId: cr.PhysicalResourceId.of(`${name}-${Date.now()}`), + ignoreErrorCodesMatching: '.*', }, - }, - physicalResourceId: cr.PhysicalResourceId.of(`${name}-${Date.now()}`), - ignoreErrorCodesMatching: '.*', - }, - policy: cr.AwsCustomResourcePolicy.fromStatements([ - new iam.PolicyStatement({ - actions: ['ssm:SendCommand'], - resources: ['*'], - }), - ]), - }); - - // Ensure instances are created before restarting - cmd.node.addDependency(...instances) + policy: cr.AwsCustomResourcePolicy.fromStatements([ + new iam.PolicyStatement({ + actions: ['ssm:SendCommand'], + resources: ['*'], + }), + ]), + }); + + // Ensure instances are created before restarting + cmd.node.addDependency(...instances) } diff --git a/benchmarks/cdk/lib/datafusion-distributed.ts b/benchmarks/cdk/lib/datafusion-distributed.ts new file mode 100644 index 00000000..9278c42b --- /dev/null +++ b/benchmarks/cdk/lib/datafusion-distributed.ts @@ -0,0 +1,73 @@ +import { + AfterEc2MachinesContext, + BeforeEc2MachinesContext, + OnEc2MachinesContext, + QueryEngine, + ROOT, + sendCommandsUnconditionally +} from "./cdk-stack"; +import {execSync} from "child_process"; +import * as s3assets from "aws-cdk-lib/aws-s3-assets"; +import path from "path"; + +let workerBinary: s3assets.Asset + +export const DATAFUSION_DISTRIBUTED_ENGINE: QueryEngine = { + beforeEc2Machines(ctx: BeforeEc2MachinesContext): void { + console.log('Building worker binary...'); + execSync('cargo zigbuild -p datafusion-distributed-benchmarks --release --bin worker --target x86_64-unknown-linux-gnu', { + cwd: ROOT, + stdio: 'inherit', + }); + console.log('Worker binary built successfully'); + + + // Upload worker binary as an asset + workerBinary = new s3assets.Asset(ctx.scope, 'WorkerBinary', { + path: path.join(ROOT, 'target/x86_64-unknown-linux-gnu/release/worker'), + }); + + workerBinary.grantRead(ctx.role) + }, + onEc2Machine(ctx: OnEc2MachinesContext): void { + ctx.instanceUserData.addCommands( + 'yum install -y gcc openssl-devel', + 'curl --proto \'=https\' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y', + + `aws s3 cp s3://${workerBinary.s3BucketName}/${workerBinary.s3ObjectKey} /usr/local/bin/worker`, + 'chmod +x /usr/local/bin/worker', + // Create systemd service + `cat > /etc/systemd/system/worker.service << 'EOF' +[Unit] +Description=DataFusion Distributed Worker +After=network.target + +[Service] +Type=simple +ExecStart=/usr/local/bin/worker --bucket ${ctx.bucketName} +Restart=always +User=root + +[Install] +WantedBy=multi-user.target +EOF`, + + // Enable and start the service + 'systemctl daemon-reload', + 'systemctl enable worker', + 'systemctl start worker', + ) + }, + afterEc2Machines(ctx: AfterEc2MachinesContext): void { + // Downloads the latest version of the worker binary and restarts the systemd service. + // This is done instead of the userData.addS3Download() so that the instance does not need + // to restart every time a new worker binary is available. + sendCommandsUnconditionally(ctx.scope, 'RestartWorkerService', + ctx.instances, + [ + `aws s3 cp s3://${workerBinary.s3BucketName}/${workerBinary.s3ObjectKey} /usr/local/bin/worker`, + 'chmod +x /usr/local/bin/worker', + 'systemctl restart worker', + ]) + } +} \ No newline at end of file diff --git a/benchmarks/cdk/lib/trino.ts b/benchmarks/cdk/lib/trino.ts index ee97a45e..9614beb9 100644 --- a/benchmarks/cdk/lib/trino.ts +++ b/benchmarks/cdk/lib/trino.ts @@ -1,34 +1,42 @@ -import * as ec2 from 'aws-cdk-lib/aws-ec2'; +import { + AfterEc2MachinesContext, + QueryEngine, + OnEc2MachinesContext, + sendCommandsUnconditionally, +} from "./cdk-stack"; const TRINO_VERSION = 476 -export function trinoUserDataCommands(instanceIndex: number, region: string): string[] { - const isCoordinator = instanceIndex === 0; - - return [ - // Install Java 24 for Trino (Trino 478 requires Java 24+) - 'yum install -y java-24-amazon-corretto-headless python', - - // Download and install Trino 478 (latest version) - 'cd /opt', - `curl -L -o trino-server.tar.gz https://repo1.maven.org/maven2/io/trino/trino-server/${TRINO_VERSION}/trino-server-${TRINO_VERSION}.tar.gz`, - 'tar -xzf trino-server.tar.gz', - `mv trino-server-${TRINO_VERSION} trino-server`, - 'rm trino-server.tar.gz', - - // Create Trino directories - 'mkdir -p /var/trino/data', - 'mkdir -p /opt/trino-server/etc/catalog', - - // Configure Trino node properties - `cat > /opt/trino-server/etc/node.properties << 'TRINO_EOF' +export const TRINO_ENGINE: QueryEngine = { + beforeEc2Machines(): void { + // nothing to compile here + }, + onEc2Machine(ctx: OnEc2MachinesContext): void { + const isCoordinator = ctx.instanceIdx === 0; + ctx.instanceUserData.addCommands( + // Install Java 24 for Trino (Trino 478 requires Java 24+) + 'yum install -y java-24-amazon-corretto-headless python', + + // Download and install Trino 478 (latest version) + 'cd /opt', + `curl -L -o trino-server.tar.gz https://repo1.maven.org/maven2/io/trino/trino-server/${TRINO_VERSION}/trino-server-${TRINO_VERSION}.tar.gz`, + 'tar -xzf trino-server.tar.gz', + `mv trino-server-${TRINO_VERSION} trino-server`, + 'rm trino-server.tar.gz', + + // Create Trino directories + 'mkdir -p /var/trino/data', + 'mkdir -p /opt/trino-server/etc/catalog', + + // Configure Trino node properties + `cat > /opt/trino-server/etc/node.properties << 'TRINO_EOF' node.environment=benchmark -node.id=instance-${instanceIndex} +node.id=instance-${ctx.instanceIdx} node.data-dir=/var/trino/data TRINO_EOF`, - // Configure Trino JVM settings (minimal - using conservative 8GB heap) - `cat > /opt/trino-server/etc/jvm.config << 'TRINO_EOF' + // Configure Trino JVM settings (minimal - using conservative 8GB heap) + `cat > /opt/trino-server/etc/jvm.config << 'TRINO_EOF' -server -Xmx8G -XX:+UseG1GC @@ -39,40 +47,40 @@ TRINO_EOF`, -Djdk.attach.allowAttachSelf=true TRINO_EOF`, - // Configure Trino config.properties (workers will be reconfigured during lazy startup) - isCoordinator - ? `cat > /opt/trino-server/etc/config.properties << 'TRINO_EOF' + // Configure Trino config.properties (workers will be reconfigured during lazy startup) + isCoordinator + ? `cat > /opt/trino-server/etc/config.properties << 'TRINO_EOF' coordinator=true node-scheduler.include-coordinator=true http-server.http.port=8080 discovery.uri=http://localhost:8080 TRINO_EOF` - : `cat > /opt/trino-server/etc/config.properties << 'TRINO_EOF' + : `cat > /opt/trino-server/etc/config.properties << 'TRINO_EOF' coordinator=false http-server.http.port=8080 discovery.uri=http://localhost:8080 TRINO_EOF`, - // Configure Hive catalog with AWS Glue metastore - `cat > /opt/trino-server/etc/catalog/hive.properties << 'TRINO_EOF' + // Configure Hive catalog with AWS Glue metastore + `cat > /opt/trino-server/etc/catalog/hive.properties << 'TRINO_EOF' connector.name=hive hive.metastore=glue -hive.metastore.glue.region=${region} +hive.metastore.glue.region=${ctx.region} fs.native-s3.enabled=true -s3.region=${region} +s3.region=${ctx.region} TRINO_EOF`, - // Configure TPCH catalog for reference - `cat > /opt/trino-server/etc/catalog/tpch.properties << 'TRINO_EOF' + // Configure TPCH catalog for reference + `cat > /opt/trino-server/etc/catalog/tpch.properties << 'TRINO_EOF' connector.name=tpch TRINO_EOF`, - // Download Trino CLI - 'curl -L -o /usr/local/bin/trino https://repo1.maven.org/maven2/io/trino/trino-cli/478/trino-cli-478-executable.jar', - 'chmod +x /usr/local/bin/trino', + // Download Trino CLI + 'curl -L -o /usr/local/bin/trino https://repo1.maven.org/maven2/io/trino/trino-cli/478/trino-cli-478-executable.jar', + 'chmod +x /usr/local/bin/trino', - // Create Trino systemd service - `cat > /etc/systemd/system/trino.service << 'TRINO_EOF' + // Create Trino systemd service + `cat > /etc/systemd/system/trino.service << 'TRINO_EOF' [Unit] Description=Trino Server After=network.target @@ -89,20 +97,29 @@ WorkingDirectory=/opt/trino-server WantedBy=multi-user.target TRINO_EOF`, - // Enable Trino (but don't start yet - will be started lazily after all instances are up) - 'systemctl daemon-reload', - 'systemctl enable trino', - 'systemctl start trino' - ]; -} - -export function trinoWorkerCommands(coordinator: ec2.Instance) { - return [ - `cat > /opt/trino-server/etc/config.properties << TRINO_EOF + // Enable Trino (but don't start yet - will be started lazily after all instances are up) + 'systemctl daemon-reload', + 'systemctl enable trino', + 'systemctl start trino' + ) + }, + afterEc2Machines(ctx: AfterEc2MachinesContext): void { + const [coordinator, ...workers] = ctx.instances + // Then start workers (they will discover the coordinator) + sendCommandsUnconditionally(ctx.scope, 'TrinoCoordinatorCommands', + [coordinator], + ['systemctl start trino'] + ) + sendCommandsUnconditionally(ctx.scope, 'TrinoWorkerCommands', + workers, + [ + `cat > /opt/trino-server/etc/config.properties << TRINO_EOF coordinator=false http-server.http.port=8080 discovery.uri=http://${coordinator.instancePrivateIp}:8080 TRINO_EOF`, - 'systemctl restart trino', - ] + 'systemctl restart trino', + ] + ) + } } \ No newline at end of file diff --git a/benchmarks/cdk/package.json b/benchmarks/cdk/package.json index 3c006f89..67b6ee81 100644 --- a/benchmarks/cdk/package.json +++ b/benchmarks/cdk/package.json @@ -11,6 +11,7 @@ "cdk": "cdk", "sync-bucket": "aws s3 sync ../data s3://datafusion-distributed-benchmarks/", "datafusion-bench": "npx ts-node bin/datafusion-bench.ts", + "ballista-bench": "npx ts-node bin/ballista-bench.ts", "trino-bench": "npx ts-node bin/trino-bench.ts" }, "devDependencies": {