From b6a0ff201b9394e18f33cac7718cb118698f8e6a Mon Sep 17 00:00:00 2001 From: Alexandr Romanenko Date: Thu, 16 Oct 2025 19:36:04 +0200 Subject: [PATCH 1/9] in work --- rust/cubestore/Cargo.lock | 41 -- rust/cubestore/cubestore/Cargo.toml | 12 +- rust/cubestore/cubestore/src/config/mod.rs | 52 +- rust/cubestore/cubestore/src/import/mod.rs | 24 +- rust/cubestore/cubestore/src/lib.rs | 2 +- .../inline_aggregate_stream.rs | 611 ++++++++++++++++++ .../src/queryplanner/inline_aggregate/mod.rs | 281 ++++++++ .../inline_aggregate/sorted_group_values.rs | 304 +++++++++ .../cubestore/src/queryplanner/mod.rs | 3 +- .../src/queryplanner/pretty_printers.rs | 4 +- .../src/queryplanner/query_executor.rs | 3 +- rust/cubestore/rust-toolchain.toml | 2 +- 12 files changed, 1247 insertions(+), 92 deletions(-) create mode 100644 rust/cubestore/cubestore/src/queryplanner/inline_aggregate/inline_aggregate_stream.rs create mode 100644 rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs create mode 100644 rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs diff --git a/rust/cubestore/Cargo.lock b/rust/cubestore/Cargo.lock index 7cd0e2c9ddcec..6a64ae7e5efe8 100644 --- a/rust/cubestore/Cargo.lock +++ b/rust/cubestore/Cargo.lock @@ -219,7 +219,6 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" version = "54.2.1" -source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631" dependencies = [ "arrow-arith", "arrow-array", @@ -239,7 +238,6 @@ dependencies = [ [[package]] name = "arrow-arith" version = "54.2.1" -source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631" dependencies = [ "arrow-array", "arrow-buffer", @@ -252,7 +250,6 @@ dependencies = [ [[package]] name = "arrow-array" version = "54.2.1" -source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631" dependencies = [ "ahash 0.8.11", "arrow-buffer", @@ -268,7 +265,6 @@ dependencies = [ [[package]] name = "arrow-buffer" version = "54.2.1" -source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631" dependencies = [ "bytes 1.10.1", "half 2.4.1", @@ -278,7 +274,6 @@ dependencies = [ [[package]] name = "arrow-cast" version = "54.2.1" -source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631" dependencies = [ "arrow-array", "arrow-buffer", @@ -298,7 +293,6 @@ dependencies = [ [[package]] name = "arrow-csv" version = "54.2.1" -source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631" dependencies = [ "arrow-array", "arrow-cast", @@ -313,7 +307,6 @@ dependencies = [ [[package]] name = "arrow-data" version = "54.2.1" -source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631" dependencies = [ "arrow-buffer", "arrow-schema", @@ -324,7 +317,6 @@ dependencies = [ [[package]] name = "arrow-ipc" version = "54.2.1" -source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631" dependencies = [ "arrow-array", "arrow-buffer", @@ -337,7 +329,6 @@ dependencies = [ [[package]] name = "arrow-json" version = "54.2.1" -source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631" dependencies = [ "arrow-array", "arrow-buffer", @@ -356,7 +347,6 @@ dependencies = [ [[package]] name = "arrow-ord" version = "54.2.1" -source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631" dependencies = [ "arrow-array", "arrow-buffer", @@ -368,7 +358,6 @@ dependencies = [ [[package]] name = "arrow-row" version = "54.2.1" -source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631" dependencies = [ "arrow-array", "arrow-buffer", @@ -380,7 +369,6 @@ dependencies = [ [[package]] name = "arrow-schema" version = "54.2.1" -source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631" dependencies = [ "serde", ] @@ -388,7 +376,6 @@ dependencies = [ [[package]] name = "arrow-select" version = "54.2.1" -source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631" dependencies = [ "ahash 0.8.11", "arrow-array", @@ -401,7 +388,6 @@ dependencies = [ [[package]] name = "arrow-string" version = "54.2.1" -source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631" dependencies = [ "arrow-array", "arrow-buffer", @@ -1723,7 +1709,6 @@ checksum = "c2e66c9d817f1720209181c316d28635c050fa304f9c79e47a520882661b7308" [[package]] name = "datafusion" version = "46.0.1" -source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3" dependencies = [ "arrow", "arrow-ipc", @@ -1776,7 +1761,6 @@ dependencies = [ [[package]] name = "datafusion-catalog" version = "46.0.1" -source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3" dependencies = [ "arrow", "async-trait", @@ -1795,7 +1779,6 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" version = "46.0.1" -source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3" dependencies = [ "arrow", "async-trait", @@ -1816,7 +1799,6 @@ dependencies = [ [[package]] name = "datafusion-common" version = "46.0.1" -source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3" dependencies = [ "ahash 0.8.11", "arrow", @@ -1839,7 +1821,6 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" version = "46.0.1" -source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3" dependencies = [ "log", "tokio", @@ -1848,7 +1829,6 @@ dependencies = [ [[package]] name = "datafusion-datasource" version = "46.0.1" -source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3" dependencies = [ "arrow", "async-compression 0.4.17", @@ -1881,12 +1861,10 @@ dependencies = [ [[package]] name = "datafusion-doc" version = "46.0.1" -source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3" [[package]] name = "datafusion-execution" version = "46.0.1" -source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3" dependencies = [ "arrow", "dashmap", @@ -1906,7 +1884,6 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "46.0.1" -source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3" dependencies = [ "arrow", "chrono", @@ -1926,7 +1903,6 @@ dependencies = [ [[package]] name = "datafusion-expr-common" version = "46.0.1" -source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3" dependencies = [ "arrow", "datafusion-common", @@ -1938,7 +1914,6 @@ dependencies = [ [[package]] name = "datafusion-functions" version = "46.0.1" -source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3" dependencies = [ "arrow", "arrow-buffer", @@ -1966,7 +1941,6 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" version = "46.0.1" -source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3" dependencies = [ "ahash 0.8.11", "arrow", @@ -1986,7 +1960,6 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" version = "46.0.1" -source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3" dependencies = [ "ahash 0.8.11", "arrow", @@ -1998,7 +1971,6 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" version = "46.0.1" -source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3" dependencies = [ "arrow", "arrow-ord", @@ -2018,7 +1990,6 @@ dependencies = [ [[package]] name = "datafusion-functions-table" version = "46.0.1" -source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3" dependencies = [ "arrow", "async-trait", @@ -2033,7 +2004,6 @@ dependencies = [ [[package]] name = "datafusion-functions-window" version = "46.0.1" -source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3" dependencies = [ "datafusion-common", "datafusion-doc", @@ -2049,7 +2019,6 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" version = "46.0.1" -source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2058,7 +2027,6 @@ dependencies = [ [[package]] name = "datafusion-macros" version = "46.0.1" -source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3" dependencies = [ "datafusion-expr", "quote", @@ -2068,7 +2036,6 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "46.0.1" -source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3" dependencies = [ "arrow", "chrono", @@ -2086,7 +2053,6 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "46.0.1" -source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3" dependencies = [ "ahash 0.8.11", "arrow", @@ -2107,7 +2073,6 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" version = "46.0.1" -source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3" dependencies = [ "ahash 0.8.11", "arrow", @@ -2120,7 +2085,6 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" version = "46.0.1" -source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3" dependencies = [ "arrow", "datafusion-common", @@ -2138,7 +2102,6 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" version = "46.0.1" -source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3" dependencies = [ "ahash 0.8.11", "arrow", @@ -2170,7 +2133,6 @@ dependencies = [ [[package]] name = "datafusion-proto" version = "46.0.1" -source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3" dependencies = [ "arrow", "chrono", @@ -2185,7 +2147,6 @@ dependencies = [ [[package]] name = "datafusion-proto-common" version = "46.0.1" -source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3" dependencies = [ "arrow", "datafusion-common", @@ -2195,7 +2156,6 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "46.0.1" -source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3" dependencies = [ "arrow", "bigdecimal 0.4.8", @@ -4554,7 +4514,6 @@ dependencies = [ [[package]] name = "parquet" version = "54.2.1" -source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631" dependencies = [ "aes-gcm", "ahash 0.8.11", diff --git a/rust/cubestore/cubestore/Cargo.toml b/rust/cubestore/cubestore/Cargo.toml index e6a307ac53e3e..83834c7a7e827 100644 --- a/rust/cubestore/cubestore/Cargo.toml +++ b/rust/cubestore/cubestore/Cargo.toml @@ -28,10 +28,10 @@ cubezetasketch = { path = "../cubezetasketch" } cubedatasketches = { path = "../cubedatasketches" } cubeshared = { path = "../../cubeshared" } cuberpc = { path = "../cuberpc" } -datafusion = { git = "https://github.com/cube-js/arrow-datafusion", branch = "cube-46.0.1", features = ["serde"] } -datafusion-datasource = { git = "https://github.com/cube-js/arrow-datafusion", branch = "cube-46.0.1" } -datafusion-proto = { git = "https://github.com/cube-js/arrow-datafusion", branch = "cube-46.0.1" } -datafusion-proto-common = { git = "https://github.com/cube-js/arrow-datafusion", branch = "cube-46.0.1" } +datafusion = { path = "/Users/war/cube_projects/new_cube/arrow-datafusion/datafusion/core/", features = ["serde"] } +datafusion-datasource = { path = "/Users/war/cube_projects/new_cube/arrow-datafusion/datafusion/datasource/" } +datafusion-proto = { path = "/Users/war/cube_projects/new_cube/arrow-datafusion/datafusion/proto/" } +datafusion-proto-common = { path = "/Users/war/cube_projects/new_cube/arrow-datafusion/datafusion/proto-common/" } csv = "1.1.3" bytes = "1.6.0" serde_json = "1.0.56" @@ -120,8 +120,8 @@ sasl2-sys = { version = "0.1.6", features = ["vendored"] } rdkafka = { version = "0.29.0", features = ["cmake-build"] } [target.'cfg(target_os = "macos")'.dependencies] -rdkafka = { version = "0.29.0", features = ["ssl", "gssapi"] } -sasl2-sys = { version = "0.1.6", features = ["vendored"] } +#rdkafka = { version = "0.29.0", features = ["ssl", "gssapi"] } +#sasl2-sys = { version = "0.1.6", features = ["vendored"] } [dev-dependencies] pretty_assertions = "0.7.1" diff --git a/rust/cubestore/cubestore/src/config/mod.rs b/rust/cubestore/cubestore/src/config/mod.rs index ac70c8b948667..f378a7e376767 100644 --- a/rust/cubestore/cubestore/src/config/mod.rs +++ b/rust/cubestore/cubestore/src/config/mod.rs @@ -36,8 +36,8 @@ use crate::sql::{SqlService, SqlServiceImpl}; use crate::sql::{TableExtensionService, TableExtensionServiceImpl}; use crate::store::compaction::{CompactionService, CompactionServiceImpl}; use crate::store::{ChunkDataStore, ChunkStore, WALDataStore, WALStore}; -use crate::streaming::kafka::{KafkaClientService, KafkaClientServiceImpl}; -use crate::streaming::{KsqlClient, KsqlClientImpl, StreamingService, StreamingServiceImpl}; +/* use crate::streaming::kafka::{KafkaClientService, KafkaClientServiceImpl}; +use crate::streaming::{KsqlClient, KsqlClientImpl, StreamingService, StreamingServiceImpl}; */ use crate::table::parquet::{ CubestoreMetadataCacheFactory, CubestoreMetadataCacheFactoryImpl, CubestoreParquetMetadataCache, CubestoreParquetMetadataCacheImpl, @@ -2194,7 +2194,7 @@ impl Config { .register_typed::(async move |i| { ImportServiceImpl::new( i.get_service_typed().await, - i.get_service_typed().await, + //i.get_service_typed().await, i.get_service_typed().await, i.get_service_typed().await, i.get_service_typed().await, @@ -2210,31 +2210,31 @@ impl Config { }) .await; - self.injector - .register_typed::(async move |i| { - StreamingServiceImpl::new( - i.get_service_typed().await, - i.get_service_typed().await, - i.get_service_typed().await, - i.get_service_typed().await, - i.get_service_typed().await, - i.get_service_typed::() - .await - .cache_factory() - .clone(), - ) - }) - .await; + /* self.injector + .register_typed::(async move |i| { + StreamingServiceImpl::new( + i.get_service_typed().await, + i.get_service_typed().await, + i.get_service_typed().await, + i.get_service_typed().await, + i.get_service_typed().await, + i.get_service_typed::() + .await + .cache_factory() + .clone(), + ) + }) + .await; */ - self.injector - .register_typed::(async move |_| KsqlClientImpl::new()) - .await; + /* self.injector + .register_typed::(async move |_| KsqlClientImpl::new()) + .await; */ - self.injector - .register_typed::(async move |i| { - KafkaClientServiceImpl::new(i.get_service_typed().await) - }) - .await; + /* self.injector + .register_typed::(async move |i| { + KafkaClientServiceImpl::new(i.get_service_typed().await) + }) + .await; */ self.injector .register_typed::(async move |_| { diff --git a/rust/cubestore/cubestore/src/import/mod.rs b/rust/cubestore/cubestore/src/import/mod.rs index f994aeee54301..8a2c4b811504f 100644 --- a/rust/cubestore/cubestore/src/import/mod.rs +++ b/rust/cubestore/cubestore/src/import/mod.rs @@ -36,7 +36,7 @@ use crate::queryplanner::trace_data_loaded::DataLoadedSize; use crate::remotefs::RemoteFs; use crate::sql::timestamp_from_string; use crate::store::ChunkDataStore; -use crate::streaming::StreamingService; +//use crate::streaming::StreamingService; use crate::table::data::{append_row, create_array_builders}; use crate::table::{Row, TableValue}; use crate::util::batch_memory::columns_vec_buffer_size; @@ -517,7 +517,7 @@ crate::di_service!(MockImportService, [ImportService]); pub struct ImportServiceImpl { meta_store: Arc, - streaming_service: Arc, + //streaming_service: Arc, chunk_store: Arc, remote_fs: Arc, config_obj: Arc, @@ -530,7 +530,7 @@ crate::di_service!(ImportServiceImpl, [ImportService]); impl ImportServiceImpl { pub fn new( meta_store: Arc, - streaming_service: Arc, + //streaming_service: Arc, chunk_store: Arc, remote_fs: Arc, config_obj: Arc, @@ -539,7 +539,7 @@ impl ImportServiceImpl { ) -> Arc { Arc::new(ImportServiceImpl { meta_store, - streaming_service, + //streaming_service, chunk_store, remote_fs, config_obj, @@ -823,13 +823,13 @@ impl ImportService for ImportServiceImpl { table, location ))); } - if Table::is_stream_location(location) { + /* if Table::is_stream_location(location) { self.streaming_service.stream_table(table, location).await?; - } else { - self.do_import(&table, *format, location, data_loaded_size.clone()) - .await?; - self.drop_temp_uploads(&location).await?; - } + } else { */ + self.do_import(&table, *format, location, data_loaded_size.clone()) + .await?; + self.drop_temp_uploads(&location).await?; + //} Ok(()) } @@ -840,11 +840,11 @@ impl ImportService for ImportServiceImpl { location: &str, ) -> Result<(), CubeError> { let table = self.meta_store.get_table_by_id(table_id).await?; - if Table::is_stream_location(location) { + /* if Table::is_stream_location(location) { self.streaming_service .validate_table_location(table, location) .await?; - } + } */ Ok(()) } diff --git a/rust/cubestore/cubestore/src/lib.rs b/rust/cubestore/cubestore/src/lib.rs index bb9e124341848..c79c44fd4b2e7 100644 --- a/rust/cubestore/cubestore/src/lib.rs +++ b/rust/cubestore/cubestore/src/lib.rs @@ -44,7 +44,7 @@ pub mod scheduler; pub mod shared; pub mod sql; pub mod store; -pub mod streaming; +//pub mod streaming; pub mod sys; pub mod table; pub mod telemetry; diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/inline_aggregate_stream.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/inline_aggregate_stream.rs new file mode 100644 index 0000000000000..4da0b9b48f7b0 --- /dev/null +++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/inline_aggregate_stream.rs @@ -0,0 +1,611 @@ +use crate::cluster::{ + pick_worker_by_ids, pick_worker_by_partitions, Cluster, WorkerPlanningParams, +}; +use crate::config::injection::DIService; +use crate::config::ConfigObj; +use crate::metastore::multi_index::MultiPartition; +use crate::metastore::table::Table; +use crate::metastore::{Column, ColumnType, IdRow, Index, Partition}; +use crate::queryplanner::filter_by_key_range::FilterByKeyRangeExec; +use crate::queryplanner::merge_sort::LastRowByUniqueKeyExec; +use crate::queryplanner::metadata_cache::{MetadataCacheFactory, NoopParquetMetadataCache}; +use crate::queryplanner::optimizations::{CubeQueryPlanner, PreOptimizeRule}; +use crate::queryplanner::physical_plan_flags::PhysicalPlanFlags; +use crate::queryplanner::planning::{get_worker_plan, Snapshot, Snapshots}; +use crate::queryplanner::pretty_printers::{pp_phys_plan, pp_phys_plan_ext, pp_plan, PPOptions}; +use crate::queryplanner::serialized_plan::{IndexSnapshot, RowFilter, RowRange, SerializedPlan}; +use crate::queryplanner::trace_data_loaded::DataLoadedSize; +use crate::store::DataFrame; +use crate::table::data::rows_to_columns; +use crate::table::parquet::CubestoreParquetMetadataCache; +use crate::table::{Row, TableValue, TimestampValue}; +use crate::telemetry::suboptimal_query_plan_event; +use crate::util::memory::MemoryHandler; +use crate::{app_metrics, CubeError}; +use async_trait::async_trait; +use core::fmt; +use datafusion::arrow::array::AsArray; +use datafusion::arrow::array::{ + make_array, Array, ArrayRef, BinaryArray, BooleanArray, Decimal128Array, Float64Array, + Int16Array, Int32Array, Int64Array, MutableArrayData, NullArray, StringArray, + TimestampMicrosecondArray, TimestampNanosecondArray, UInt16Array, UInt32Array, UInt64Array, + UInt8Array, +}; +use datafusion::arrow::compute::SortOptions; +use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit}; +use datafusion::arrow::ipc::reader::StreamReader; +use datafusion::arrow::ipc::writer::StreamWriter; +use datafusion::arrow::record_batch::RecordBatch; +use datafusion::catalog::Session; +use datafusion::common::ToDFSchema; +use datafusion::config::TableParquetOptions; +use datafusion::datasource::listing::PartitionedFile; +use datafusion::datasource::object_store::ObjectStoreUrl; +use datafusion::datasource::physical_plan::parquet::get_reader_options_customizer; +use datafusion::datasource::physical_plan::{ + FileScanConfig, ParquetFileReaderFactory, ParquetSource, +}; +use datafusion::datasource::{TableProvider, TableType}; +use datafusion::dfschema::internal_err; +use datafusion::dfschema::not_impl_err; +use datafusion::error::DataFusionError; +use datafusion::error::Result as DFResult; +use datafusion::execution::TaskContext; +use datafusion::logical_expr::{Expr, GroupsAccumulator, LogicalPlan}; +use datafusion::physical_expr::expressions::Column as DFColumn; +use datafusion::physical_expr::LexOrdering; +use datafusion::physical_expr::{self, GroupsAccumulatorAdapter}; +use datafusion::physical_expr::{ + Distribution, EquivalenceProperties, LexRequirement, PhysicalSortExpr, PhysicalSortRequirement, +}; +use datafusion::physical_optimizer::aggregate_statistics::AggregateStatistics; +use datafusion::physical_optimizer::combine_partial_final_agg::CombinePartialFinalAggregate; +use datafusion::physical_optimizer::enforce_sorting::EnforceSorting; +use datafusion::physical_optimizer::join_selection::JoinSelection; +use datafusion::physical_optimizer::limit_pushdown::LimitPushdown; +use datafusion::physical_optimizer::limited_distinct_aggregation::LimitedDistinctAggregation; +use datafusion::physical_optimizer::output_requirements::OutputRequirements; +use datafusion::physical_optimizer::projection_pushdown::ProjectionPushdown; +use datafusion::physical_optimizer::sanity_checker::SanityCheckPlan; +use datafusion::physical_optimizer::topk_aggregation::TopKAggregation; +use datafusion::physical_optimizer::update_aggr_exprs::OptimizeAggregateOrder; +use datafusion::physical_optimizer::PhysicalOptimizerRule; +use datafusion::physical_plan::aggregates::*; +use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec; +use datafusion::physical_plan::empty::EmptyExec; +use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType}; +use datafusion::physical_plan::projection::ProjectionExec; +use datafusion::physical_plan::sorts::sort::SortExec; +use datafusion::physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; +use datafusion::physical_plan::udaf::AggregateFunctionExpr; +use datafusion::physical_plan::{ + collect, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PhysicalExpr, + PlanProperties, SendableRecordBatchStream, +}; +use datafusion::prelude::{and, SessionConfig, SessionContext}; +use datafusion_datasource::memory::MemorySourceConfig; +use datafusion_datasource::source::DataSourceExec; +use futures::ready; +use futures::{ + stream::{Stream, StreamExt}, + Future, +}; +use itertools::Itertools; +use log::{debug, error, trace, warn}; +use mockall::automock; +use serde_derive::{Deserialize, Serialize}; +use std::any::Any; +use std::cmp::min; +use std::collections::{HashMap, HashSet}; +use std::fmt::{Debug, Formatter}; +use std::io::Cursor; +use std::mem::take; +use std::sync::Arc; +use std::task::{Context, Poll}; +use std::time::SystemTime; +use tarpc::context::current; +use tracing::{instrument, Instrument}; + +use super::InlineAggregateExec; +use super::InlineAggregateMode; + +#[derive(Debug, Clone)] +pub(crate) enum ExecutionState { + ReadingInput, + ProducingOutput(RecordBatch), + Done, +} + +pub(crate) struct InlineAggregateStream { + schema: SchemaRef, + input: SendableRecordBatchStream, + mode: InlineAggregateMode, + + aggregate_arguments: Vec>>, + + filter_expressions: Vec>>, + + group_by: PhysicalGroupBy, + + batch_size: usize, + + exec_state: ExecutionState, + + input_done: bool, + + accumulators: Vec>, + current_group_indices: Vec, +} + +impl InlineAggregateStream { + pub fn new( + agg: &InlineAggregateExec, + context: Arc, + partition: usize, + ) -> DFResult { + let agg_schema = Arc::clone(&agg.schema); + let agg_group_by = agg.group_by.clone(); + let agg_filter_expr = agg.filter_expr.clone(); + + let batch_size = context.session_config().batch_size(); + let input = agg.input.execute(partition, Arc::clone(&context))?; + + let aggregate_exprs = agg.aggr_expr.clone(); + + // arguments for each aggregate, one vec of expressions per + // aggregate + let aggregate_arguments = + aggregate_expressions(&agg.aggr_expr, &agg.mode, agg_group_by.num_group_exprs())?; + // arguments for aggregating spilled data is the same as the one for final aggregation + let merging_aggregate_arguments = aggregate_expressions( + &agg.aggr_expr, + &InlineAggregateMode::Final, + agg_group_by.num_group_exprs(), + )?; + + let filter_expressions = match agg.mode { + InlineAggregateMode::Partial => agg_filter_expr, + InlineAggregateMode::Final => { + vec![None; agg.aggr_expr.len()] + } + }; + + let accumulators: Vec<_> = aggregate_exprs + .iter() + .map(create_group_accumulator) + .collect::>()?; + + let group_schema = agg_group_by.group_schema(&agg.input().schema())?; + + let partial_agg_schema = create_schema( + &agg.input().schema(), + &agg_group_by, + &aggregate_exprs, + InlineAggregateMode::Partial, + )?; + + let partial_agg_schema = Arc::new(partial_agg_schema); + + let exec_state = ExecutionState::ReadingInput; + let current_group_indices = Vec::with_capacity(batch_size); + + Ok(InlineAggregateStream { + schema: agg_schema, + input, + mode: agg.mode, + accumulators, + aggregate_arguments, + filter_expressions, + group_by: agg_group_by, + exec_state, + batch_size, + current_group_indices, + input_done: false, + }) + } +} + +fn create_schema( + input_schema: &Schema, + group_by: &PhysicalGroupBy, + aggr_expr: &[Arc], + mode: InlineAggregateMode, +) -> DFResult { + let mut fields = Vec::with_capacity(group_by.num_output_exprs() + aggr_expr.len()); + fields.extend(group_by.output_fields(input_schema)?); + + match mode { + InlineAggregateMode::Partial => { + // in partial mode, the fields of the accumulator's state + for expr in aggr_expr { + fields.extend(expr.state_fields()?.iter().cloned()); + } + } + InlineAggregateMode::Final => { + // in final mode, the field with the final result of the accumulator + for expr in aggr_expr { + fields.push(expr.field()) + } + } + } + + Ok(Schema::new_with_metadata( + fields, + input_schema.metadata().clone(), + )) +} + +fn aggregate_expressions( + aggr_expr: &[Arc], + mode: &InlineAggregateMode, + col_idx_base: usize, +) -> DFResult>>> { + match mode { + InlineAggregateMode::Partial => Ok(aggr_expr + .iter() + .map(|agg| { + let mut result = agg.expressions(); + // Append ordering requirements to expressions' results. This + // way order sensitive aggregators can satisfy requirement + // themselves. + if let Some(ordering_req) = agg.order_bys() { + result.extend(ordering_req.iter().map(|item| Arc::clone(&item.expr))); + } + result + }) + .collect()), + InlineAggregateMode::Final => { + let mut col_idx_base = col_idx_base; + aggr_expr + .iter() + .map(|agg| { + let exprs = merge_expressions(col_idx_base, agg)?; + col_idx_base += exprs.len(); + Ok(exprs) + }) + .collect() + } + } +} + +fn merge_expressions( + index_base: usize, + expr: &AggregateFunctionExpr, +) -> DFResult>> { + expr.state_fields().map(|fields| { + fields + .iter() + .enumerate() + .map(|(idx, f)| Arc::new(DFColumn::new(f.name(), index_base + idx)) as _) + .collect() + }) +} + +pub(crate) fn create_group_accumulator( + agg_expr: &Arc, +) -> DFResult> { + if agg_expr.groups_accumulator_supported() { + agg_expr.create_groups_accumulator() + } else { + let agg_expr_captured = Arc::clone(agg_expr); + let factory = move || agg_expr_captured.create_accumulator(); + Ok(Box::new(GroupsAccumulatorAdapter::new(factory))) + } +} + +impl Stream for InlineAggregateStream { + type Item = DFResult; + + fn poll_next( + mut self: std::pin::Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + loop { + match &self.exec_state { + ExecutionState::ReadingInput => 'reading_input: { + match ready!(self.input.poll_next_unpin(cx)) { + // New batch to aggregate in partial aggregation operator + Some(Ok(batch)) if self.mode == InlineAggregateMode::Partial => { + /* let timer = elapsed_compute.timer(); + let input_rows = batch.num_rows(); + + // Do the grouping + self.group_aggregate_batch(batch)?; + + self.update_skip_aggregation_probe(input_rows); + + // If we can begin emitting rows, do so, + // otherwise keep consuming input + assert!(!self.input_done); + + // If the number of group values equals or exceeds the soft limit, + // emit all groups and switch to producing output + if self.hit_soft_group_limit() { + timer.done(); + self.set_input_done_and_produce_output()?; + // make sure the exec_state just set is not overwritten below + break 'reading_input; + } + + if let Some(to_emit) = self.group_ordering.emit_to() { + timer.done(); + if let Some(batch) = self.emit(to_emit, false)? { + + ExecutionState::ProducingOutput(batch); + }; + // make sure the exec_state just set is not overwritten below + break 'reading_input; + } + + self.emit_early_if_necessary()?; + + self.switch_to_skip_aggregation()?; + + timer.done(); */ + todo!() + } + + // New batch to aggregate in terminal aggregation operator + // (Final/FinalPartitioned/Single/SinglePartitioned) + Some(Ok(batch)) => { + /* let timer = elapsed_compute.timer(); + + // Make sure we have enough capacity for `batch`, otherwise spill + self.spill_previous_if_necessary(&batch)?; + + // Do the grouping + + + // If we can begin emitting rows, do so, + // otherwise keep consuming input + assert!(!self.input_done); + + // If the number of group values equals or exceeds the soft limit, + // emit all groups and switch to producing output + if self.hit_soft_group_limit() { + timer.done(); + self.set_input_done_and_produce_output()?; + // make sure the exec_state just set is not overwritten below + break 'reading_input; + } + + if let Some(to_emit) = self.group_ordering.emit_to() { + timer.done(); + if let Some(batch) = self.emit(to_emit, false)? { + self.exec_state = + ExecutionState::ProducingOutput(batch); + }; + // make sure the exec_state just set is not overwritten below + break 'reading_input; + } + + timer.done(); */ + todo!() + } + + // Found error from input stream + Some(Err(e)) => { + // inner had error, return to caller + return Poll::Ready(Some(Err(e))); + } + + // Found end from input stream + None => { + // inner is done, emit all rows and switch to producing output + //self.set_input_done_and_produce_output()?; + todo!() + } + } + } + + ExecutionState::ProducingOutput(batch) => { + // slice off a part of the batch, if needed + /* let output_batch; + let size = self.batch_size; + (self.exec_state, output_batch) = if batch.num_rows() <= size { + ( + if self.input_done { + ExecutionState::Done + } + // In Partial aggregation, we also need to check + // if we should trigger partial skipping + else if self.mode == AggregateMode::Partial + && self.should_skip_aggregation() + { + ExecutionState::SkippingAggregation + } else { + ExecutionState::ReadingInput + }, + batch.clone(), + ) + } else { + // output first batch_size rows + let size = self.batch_size; + let num_remaining = batch.num_rows() - size; + let remaining = batch.slice(size, num_remaining); + let output = batch.slice(0, size); + (ExecutionState::ProducingOutput(remaining), output) + }; + // Empty record batches should not be emitted. + // They need to be treated as [`Option`]es and handled separately + debug_assert!(output_batch.num_rows() > 0); + return Poll::Ready(Some(Ok( + output_batch.record_output(&self.baseline_metrics) + ))); */ + todo!() + } + + ExecutionState::Done => { + // release the memory reservation since sending back output batch itself needs + // some memory reservation, so make some room for it. + /* self.clear_all(); + let _ = self.update_memory_reservation(); */ + return Poll::Ready(None); + } + } + } + } +} + +impl InlineAggregateStream { + fn group_aggregate_batch(&mut self, batch: RecordBatch) -> DFResult<()> { + // Evaluate the grouping expressions + /* let group_by_values = evaluate_group_by(&self.group_by, &batch)?; + + // Evaluate the aggregation expressions. + let input_values = evaluate_many(&self.aggregate_arguments, &batch)?; + + // Evaluate the filter expressions, if any, against the inputs + let filter_values = evaluate_optional(&self.filter_expressions, &batch)?; + + for group_values in &group_by_values { + // calculate the group indices for each input row + let starting_num_groups = self.group_values.len(); + self.group_values + .intern(group_values, &mut self.current_group_indices)?; + let group_indices = &self.current_group_indices; + + // Update ordering information if necessary + /* let total_num_groups = self.group_values.len(); + if total_num_groups > starting_num_groups { + self.group_ordering + .new_groups(group_values, group_indices, total_num_groups)?; + } */ + + // Gather the inputs to call the actual accumulator + let t = self + .accumulators + .iter_mut() + .zip(input_values.iter()) + .zip(filter_values.iter()); + + for ((acc, values), opt_filter) in t { + let opt_filter = opt_filter.as_ref().map(|filter| filter.as_boolean()); + + // Call the appropriate method on each aggregator with + // the entire input row and the relevant group indexes + match self.mode { + InlineAggregateMode::Partial => { + acc.update_batch(values, group_indices, opt_filter, total_num_groups)?; + } + _ => { + if opt_filter.is_some() { + return internal_err!("aggregate filter should be applied in partial stage, there should be no filter in final stage"); + } + + // if aggregation is over intermediate states, + // use merge + acc.merge_batch(values, group_indices, None, total_num_groups)?; + } + } + } + } */ + Ok(()) + } +} + +/// Evaluates expressions against a record batch. +fn evaluate(expr: &[Arc], batch: &RecordBatch) -> DFResult> { + expr.iter() + .map(|expr| { + expr.evaluate(batch) + .and_then(|v| v.into_array(batch.num_rows())) + }) + .collect() +} + +/// Evaluates expressions against a record batch. +fn evaluate_many( + expr: &[Vec>], + batch: &RecordBatch, +) -> DFResult>> { + expr.iter().map(|expr| evaluate(expr, batch)).collect() +} + +fn evaluate_optional( + expr: &[Option>], + batch: &RecordBatch, +) -> DFResult>> { + expr.iter() + .map(|expr| { + expr.as_ref() + .map(|expr| { + expr.evaluate(batch) + .and_then(|v| v.into_array(batch.num_rows())) + }) + .transpose() + }) + .collect() +} + +fn group_id_array(group: &[bool], batch: &RecordBatch) -> DFResult { + if group.len() > 64 { + return not_impl_err!("Grouping sets with more than 64 columns are not supported"); + } + let group_id = group.iter().fold(0u64, |acc, &is_null| { + (acc << 1) | if is_null { 1 } else { 0 } + }); + let num_rows = batch.num_rows(); + if group.len() <= 8 { + Ok(Arc::new(UInt8Array::from(vec![group_id as u8; num_rows]))) + } else if group.len() <= 16 { + Ok(Arc::new(UInt16Array::from(vec![group_id as u16; num_rows]))) + } else if group.len() <= 32 { + Ok(Arc::new(UInt32Array::from(vec![group_id as u32; num_rows]))) + } else { + Ok(Arc::new(UInt64Array::from(vec![group_id; num_rows]))) + } +} + +/// Evaluate a group by expression against a `RecordBatch` +/// +/// Arguments: +/// - `group_by`: the expression to evaluate +/// - `batch`: the `RecordBatch` to evaluate against +/// +/// Returns: A Vec of Vecs of Array of results +/// The outer Vec appears to be for grouping sets +/// The inner Vec contains the results per expression +/// The inner-inner Array contains the results per row +fn evaluate_group_by( + group_by: &PhysicalGroupBy, + batch: &RecordBatch, +) -> DFResult>> { + let exprs: Vec = group_by + .expr() + .iter() + .map(|(expr, _)| { + let value = expr.evaluate(batch)?; + value.into_array(batch.num_rows()) + }) + .collect::>>()?; + + let null_exprs: Vec = group_by + .null_expr() + .iter() + .map(|(expr, _)| { + let value = expr.evaluate(batch)?; + value.into_array(batch.num_rows()) + }) + .collect::>>()?; + + group_by + .groups() + .iter() + .map(|group| { + let mut group_values = Vec::with_capacity(group_by.num_group_exprs()); + group_values.extend(group.iter().enumerate().map(|(idx, is_null)| { + if *is_null { + Arc::clone(&null_exprs[idx]) + } else { + Arc::clone(&exprs[idx]) + } + })); + if !group_by.is_single() { + group_values.push(group_id_array(group, batch)?); + } + Ok(group_values) + }) + .collect() +} diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs new file mode 100644 index 0000000000000..6f78a7ce9b375 --- /dev/null +++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs @@ -0,0 +1,281 @@ +mod inline_aggregate_stream; +mod sorted_group_values; +use crate::cluster::{ + pick_worker_by_ids, pick_worker_by_partitions, Cluster, WorkerPlanningParams, +}; +use crate::config::injection::DIService; +use crate::config::ConfigObj; +use crate::metastore::multi_index::MultiPartition; +use crate::metastore::table::Table; +use crate::metastore::{Column, ColumnType, IdRow, Index, Partition}; +use crate::queryplanner::filter_by_key_range::FilterByKeyRangeExec; +use crate::queryplanner::merge_sort::LastRowByUniqueKeyExec; +use crate::queryplanner::metadata_cache::{MetadataCacheFactory, NoopParquetMetadataCache}; +use crate::queryplanner::optimizations::{CubeQueryPlanner, PreOptimizeRule}; +use crate::queryplanner::physical_plan_flags::PhysicalPlanFlags; +use crate::queryplanner::planning::{get_worker_plan, Snapshot, Snapshots}; +use crate::queryplanner::pretty_printers::{pp_phys_plan, pp_phys_plan_ext, pp_plan, PPOptions}; +use crate::queryplanner::serialized_plan::{IndexSnapshot, RowFilter, RowRange, SerializedPlan}; +use crate::queryplanner::trace_data_loaded::DataLoadedSize; +use crate::store::DataFrame; +use crate::table::data::rows_to_columns; +use crate::table::parquet::CubestoreParquetMetadataCache; +use crate::table::{Row, TableValue, TimestampValue}; +use crate::telemetry::suboptimal_query_plan_event; +use crate::util::memory::MemoryHandler; +use crate::{app_metrics, CubeError}; +use async_trait::async_trait; +use core::fmt; +use datafusion::arrow::array::{ + make_array, Array, ArrayRef, BinaryArray, BooleanArray, Decimal128Array, Float64Array, + Int16Array, Int32Array, Int64Array, MutableArrayData, NullArray, StringArray, + TimestampMicrosecondArray, TimestampNanosecondArray, UInt16Array, UInt32Array, UInt64Array, + UInt8Array, +}; +use datafusion::arrow::compute::SortOptions; +use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit}; +use datafusion::arrow::ipc::reader::StreamReader; +use datafusion::arrow::ipc::writer::StreamWriter; +use datafusion::arrow::record_batch::RecordBatch; +use datafusion::catalog::Session; +use datafusion::common::stats::Precision; +use datafusion::common::{Statistics, ToDFSchema}; +use datafusion::config::TableParquetOptions; +use datafusion::datasource::listing::PartitionedFile; +use datafusion::datasource::object_store::ObjectStoreUrl; +use datafusion::datasource::physical_plan::parquet::get_reader_options_customizer; +use datafusion::datasource::physical_plan::{ + FileScanConfig, ParquetFileReaderFactory, ParquetSource, +}; +use datafusion::datasource::{TableProvider, TableType}; +use datafusion::dfschema::{internal_err, not_impl_err}; +use datafusion::error::DataFusionError; +use datafusion::error::Result as DFResult; +use datafusion::execution::TaskContext; +use datafusion::logical_expr::{Expr, LogicalPlan}; +use datafusion::physical_expr; +use datafusion::physical_expr::aggregate::AggregateFunctionExpr; +use datafusion::physical_expr::LexOrdering; +use datafusion::physical_expr::{ + Distribution, EquivalenceProperties, LexRequirement, PhysicalSortExpr, PhysicalSortRequirement, +}; +use datafusion::physical_optimizer::aggregate_statistics::AggregateStatistics; +use datafusion::physical_optimizer::combine_partial_final_agg::CombinePartialFinalAggregate; +use datafusion::physical_optimizer::enforce_sorting::EnforceSorting; +use datafusion::physical_optimizer::join_selection::JoinSelection; +use datafusion::physical_optimizer::limit_pushdown::LimitPushdown; +use datafusion::physical_optimizer::limited_distinct_aggregation::LimitedDistinctAggregation; +use datafusion::physical_optimizer::output_requirements::OutputRequirements; +use datafusion::physical_optimizer::projection_pushdown::ProjectionPushdown; +use datafusion::physical_optimizer::sanity_checker::SanityCheckPlan; +use datafusion::physical_optimizer::topk_aggregation::TopKAggregation; +use datafusion::physical_optimizer::update_aggr_exprs::OptimizeAggregateOrder; +use datafusion::physical_optimizer::PhysicalOptimizerRule; +use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec; +use datafusion::physical_plan::empty::EmptyExec; +use datafusion::physical_plan::execution_plan::{Boundedness, CardinalityEffect, EmissionType}; +use datafusion::physical_plan::metrics::MetricsSet; +use datafusion::physical_plan::projection::ProjectionExec; +use datafusion::physical_plan::sorts::sort::SortExec; +use datafusion::physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; +use datafusion::physical_plan::{aggregates::*, InputOrderMode}; +use datafusion::physical_plan::{ + collect, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PhysicalExpr, + PlanProperties, SendableRecordBatchStream, +}; +use datafusion::prelude::{and, SessionConfig, SessionContext}; +use datafusion_datasource::memory::MemorySourceConfig; +use datafusion_datasource::source::DataSourceExec; +use futures_util::{stream, StreamExt, TryStreamExt}; +use itertools::Itertools; +use log::{debug, error, trace, warn}; +use mockall::automock; +use serde_derive::{Deserialize, Serialize}; +use std::any::Any; +use std::cmp::min; +use std::collections::{HashMap, HashSet}; +use std::fmt::{Debug, Formatter}; +use std::io::Cursor; +use std::mem::take; +use std::sync::Arc; +use std::time::SystemTime; +use tracing::{instrument, Instrument}; + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum InlineAggregateMode { + Partial, + Final, +} + +#[derive(Debug, Clone)] +pub struct InlineAggregateExec { + mode: InlineAggregateMode, + /// Group by expressions + group_by: PhysicalGroupBy, + /// Aggregate expressions + aggr_expr: Vec>, + /// FILTER (WHERE clause) expression for each aggregate expression + filter_expr: Vec>>, + /// Set if the output of this aggregation is truncated by a upstream sort/limit clause + limit: Option, + /// Input plan, could be a partial aggregate or the input to the aggregate + pub input: Arc, + /// Schema after the aggregate is applied + schema: SchemaRef, + /// Input schema before any aggregation is applied. For partial aggregate this will be the + /// same as input.schema() but for the final aggregate it will be the same as the input + /// to the partial aggregate, i.e., partial and final aggregates have same `input_schema`. + /// We need the input schema of partial aggregate to be able to deserialize aggregate + /// expressions from protobuf for final aggregate. + pub input_schema: SchemaRef, + cache: PlanProperties, +} + +impl InlineAggregateExec { + pub fn try_new_from_aggregate(aggregate: &AggregateExec) -> Option { + if matches!(aggregate.input_order_mode(), InputOrderMode::Sorted) { + return None; + } + let mode = match aggregate.mode() { + AggregateMode::Partial => InlineAggregateMode::Partial, + AggregateMode::Final => InlineAggregateMode::Final, + _ => return None, + }; + let group_by = aggregate.group_expr().clone(); + let aggr_expr = aggregate.aggr_expr().iter().cloned().collect(); + let filter_expr = aggregate.filter_expr().iter().cloned().collect(); + let limit = aggregate.limit().clone(); + let input = aggregate.input().clone(); + let schema = aggregate.schema().clone(); + let input_schema = aggregate.input_schema().clone(); + let cache = aggregate.cache().clone(); + Some(Self { + mode, + group_by, + aggr_expr, + filter_expr, + limit, + input, + schema, + input_schema, + cache, + }) + } + + pub fn input(&self) -> &Arc { + &self.input + } +} + +impl DisplayAs for InlineAggregateExec { + fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match t { + DisplayFormatType::Default | DisplayFormatType::Verbose => { + write!(f, "InlineAggregateExec: mode={:?}", self.mode)?; + } + } + Ok(()) + } +} + +impl ExecutionPlan for InlineAggregateExec { + fn name(&self) -> &'static str { + "InlineAggregateExec" + } + + /// Return a reference to Any that can be used for down-casting + fn as_any(&self) -> &dyn Any { + self + } + + fn properties(&self) -> &PlanProperties { + &self.cache + } + + fn required_input_distribution(&self) -> Vec { + match &self.mode { + InlineAggregateMode::Partial => { + vec![Distribution::UnspecifiedDistribution] + } + InlineAggregateMode::Final => { + vec![Distribution::SinglePartition] + } + } + } + + fn required_input_ordering(&self) -> Vec> { + vec![] + } + + fn maintains_input_order(&self) -> Vec { + vec![true] + } + + fn children(&self) -> Vec<&Arc> { + vec![&self.input] + } + + fn with_new_children( + self: Arc, + children: Vec>, + ) -> DFResult> { + let result = Self { + mode: self.mode, + group_by: self.group_by.clone(), + aggr_expr: self.aggr_expr.clone(), + filter_expr: self.filter_expr.clone(), + limit: self.limit.clone(), + input: children[0].clone(), + schema: self.schema.clone(), + input_schema: self.input_schema.clone(), + cache: self.cache.clone(), + }; + Ok(Arc::new(result)) + } + + fn execute( + &self, + partition: usize, + context: Arc, + ) -> DFResult { + /* self.execute_typed(partition, context) + .map(|stream| stream.into()) */ + todo!() + } + + fn metrics(&self) -> Option { + None + } + + fn statistics(&self) -> DFResult { + let column_statistics = Statistics::unknown_column(&self.schema()); + // When the input row count is 0 or 1, we can adopt that statistic keeping its reliability. + // When it is larger than 1, we degrade the precision since it may decrease after aggregation. + let num_rows = if let Some(value) = self.input().statistics()?.num_rows.get_value() { + if *value > 1 { + self.input().statistics()?.num_rows.to_inexact() + } else if *value == 0 { + // Aggregation on an empty table creates a null row. + self.input() + .statistics()? + .num_rows + .add(&Precision::Exact(1)) + } else { + // num_rows = 1 case + self.input().statistics()?.num_rows + } + } else { + Precision::Absent + }; + Ok(Statistics { + num_rows, + column_statistics, + total_byte_size: Precision::Absent, + }) + } + + fn cardinality_effect(&self) -> CardinalityEffect { + CardinalityEffect::LowerEqual + } +} diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs new file mode 100644 index 0000000000000..815274af16a85 --- /dev/null +++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs @@ -0,0 +1,304 @@ +use datafusion::physical_plan::aggregates::group_values::multi_group_by::GroupColumn; + +use std::mem::{self, size_of}; + +use datafusion::arrow::array::{Array, ArrayRef, RecordBatch}; +use datafusion::arrow::compute::cast; +use datafusion::arrow::datatypes::{ + BinaryViewType, DataType, Date32Type, Date64Type, Decimal128Type, Float32Type, Float64Type, + Int16Type, Int32Type, Int64Type, Int8Type, Schema, SchemaRef, StringViewType, + Time32MillisecondType, Time32SecondType, Time64MicrosecondType, Time64NanosecondType, TimeUnit, + TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType, + TimestampSecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type, +}; +use datafusion::dfschema::internal_err; +use datafusion::dfschema::not_impl_err; +use datafusion::error::Result as DFResult; +use datafusion::physical_expr::binary_map::OutputType; +use datafusion::physical_plan::aggregates::group_values::multi_group_by::{ + ByteGroupValueBuilder, ByteViewGroupValueBuilder, PrimitiveGroupValueBuilder, +}; + +pub struct SortedGroupValues { + /// The output schema + schema: SchemaRef, + group_values: Vec>, + rows_inds: Vec, + equal_to_results: Vec, +} + +impl SortedGroupValues { + pub fn try_new(schema: SchemaRef) -> DFResult { + Ok(Self { + schema, + group_values: vec![], + rows_inds: vec![], + equal_to_results: vec![], + }) + } + + fn intern_impl(&mut self, cols: &[ArrayRef], groups: &mut Vec) -> DFResult<()> { + /* let n_rows = cols[0].len(); + groups.clear(); + + if n_rows == 0 { + return Ok(()); + } + + let first_group_idx = self.make_new_group_if_needed(cols, 0); + groups.push(first_group_idx); + + if n_rows == 1 { + return Ok(()); + } + + if self.rows_inds.len() < n_rows { + let old_len = self.rows_inds.len(); + self.rows_inds.extend(old_len..n_rows); + } + + self.equal_to_results.fill(true); + self.equal_to_results.resize(n_rows - 1, true); + + let lhs_rows = &self.rows_inds[0..n_rows - 1]; + let rhs_rows = &self.rows_inds[1..n_rows]; + for (col_idx, group_col) in self.group_values.iter().enumerate() { + cols[col_idx].vectorized_equal_to( + lhs_rows, + &cols[col_idx], + rhs_rows, + &mut self.equal_to_results, + ); + } + println!("!!!!! AAAAAAAAAA"); + let mut current_group_idx = first_group_idx; + for i in 0..n_rows - 1 { + if !self.equal_to_results[i] { + for (col_idx, group_value) in self.group_values.iter_mut().enumerate() { + group_value.append_val(&cols[col_idx], i + 1); + } + current_group_idx = self.group_values[0].len() - 1; + } + groups.push(current_group_idx); + } + println!("!!!!! BBBBBBB"); + Ok(()) */ + Ok(()) + } + + fn make_new_group_if_needed(&mut self, cols: &[ArrayRef], row: usize) -> usize { + let new_group_needed = if self.group_values[0].len() == 0 { + true + } else { + self.group_values.iter().enumerate().any(|(i, group_val)| { + !group_val.equal_to(self.group_values[0].len() - 1, &cols[i], row) + }) + }; + if new_group_needed { + for (i, group_value) in self.group_values.iter_mut().enumerate() { + group_value.append_val(&cols[i], row); + } + } + self.group_values[0].len() - 1 + } +} + +/// instantiates a [`PrimitiveGroupValueBuilder`] and pushes it into $v +/// +/// Arguments: +/// `$v`: the vector to push the new builder into +/// `$nullable`: whether the input can contains nulls +/// `$t`: the primitive type of the builder +/// +macro_rules! instantiate_primitive { + ($v:expr, $nullable:expr, $t:ty, $data_type:ident) => { + if $nullable { + let b = PrimitiveGroupValueBuilder::<$t, true>::new($data_type.to_owned()); + $v.push(Box::new(b) as _) + } else { + let b = PrimitiveGroupValueBuilder::<$t, false>::new($data_type.to_owned()); + $v.push(Box::new(b) as _) + } + }; +} + +/* impl GroupValues for SortedGroupValues { + fn intern(&mut self, cols: &[ArrayRef], groups: &mut Vec) -> Result<()> { + if self.group_values.is_empty() { + let mut v = Vec::with_capacity(cols.len()); + + for f in self.schema.fields().iter() { + let nullable = f.is_nullable(); + let data_type = f.data_type(); + match data_type { + &DataType::Int8 => { + instantiate_primitive!(v, nullable, Int8Type, data_type) + } + &DataType::Int16 => { + instantiate_primitive!(v, nullable, Int16Type, data_type) + } + &DataType::Int32 => { + instantiate_primitive!(v, nullable, Int32Type, data_type) + } + &DataType::Int64 => { + instantiate_primitive!(v, nullable, Int64Type, data_type) + } + &DataType::UInt8 => { + instantiate_primitive!(v, nullable, UInt8Type, data_type) + } + &DataType::UInt16 => { + instantiate_primitive!(v, nullable, UInt16Type, data_type) + } + &DataType::UInt32 => { + instantiate_primitive!(v, nullable, UInt32Type, data_type) + } + &DataType::UInt64 => { + instantiate_primitive!(v, nullable, UInt64Type, data_type) + } + &DataType::Float32 => { + instantiate_primitive!(v, nullable, Float32Type, data_type) + } + &DataType::Float64 => { + instantiate_primitive!(v, nullable, Float64Type, data_type) + } + &DataType::Date32 => { + instantiate_primitive!(v, nullable, Date32Type, data_type) + } + &DataType::Date64 => { + instantiate_primitive!(v, nullable, Date64Type, data_type) + } + &DataType::Time32(t) => match t { + TimeUnit::Second => { + instantiate_primitive!(v, nullable, Time32SecondType, data_type) + } + TimeUnit::Millisecond => { + instantiate_primitive!(v, nullable, Time32MillisecondType, data_type) + } + _ => {} + }, + &DataType::Time64(t) => match t { + TimeUnit::Microsecond => { + instantiate_primitive!(v, nullable, Time64MicrosecondType, data_type) + } + TimeUnit::Nanosecond => { + instantiate_primitive!(v, nullable, Time64NanosecondType, data_type) + } + _ => {} + }, + &DataType::Timestamp(t, _) => match t { + TimeUnit::Second => { + instantiate_primitive!(v, nullable, TimestampSecondType, data_type) + } + TimeUnit::Millisecond => { + instantiate_primitive!(v, nullable, TimestampMillisecondType, data_type) + } + TimeUnit::Microsecond => { + instantiate_primitive!(v, nullable, TimestampMicrosecondType, data_type) + } + TimeUnit::Nanosecond => { + instantiate_primitive!(v, nullable, TimestampNanosecondType, data_type) + } + }, + &DataType::Decimal128(_, _) => { + instantiate_primitive! { + v, + nullable, + Decimal128Type, + data_type + } + } + &DataType::Utf8 => { + let b = ByteGroupValueBuilder::::new(OutputType::Utf8); + v.push(Box::new(b) as _) + } + &DataType::LargeUtf8 => { + let b = ByteGroupValueBuilder::::new(OutputType::Utf8); + v.push(Box::new(b) as _) + } + &DataType::Binary => { + let b = ByteGroupValueBuilder::::new(OutputType::Binary); + v.push(Box::new(b) as _) + } + &DataType::LargeBinary => { + let b = ByteGroupValueBuilder::::new(OutputType::Binary); + v.push(Box::new(b) as _) + } + &DataType::Utf8View => { + let b = ByteViewGroupValueBuilder::::new(); + v.push(Box::new(b) as _) + } + &DataType::BinaryView => { + let b = ByteViewGroupValueBuilder::::new(); + v.push(Box::new(b) as _) + } + dt => return not_impl_err!("{dt} not supported in GroupValuesColumn"), + } + } + self.group_values = v; + } + self.intern_impl(cols, groups) + } + + fn size(&self) -> usize { + let group_values_size: usize = self.group_values.iter().map(|v| v.size()).sum(); + group_values_size + } + + fn is_empty(&self) -> bool { + self.len() == 0 + } + + fn len(&self) -> usize { + if self.group_values.is_empty() { + return 0; + } + + self.group_values[0].len() + } + + fn emit(&mut self) -> Result> { + /* let mut output = match emit_to { + EmitTo::All => { + let group_values = mem::take(&mut self.group_values); + debug_assert!(self.group_values.is_empty()); + + group_values + .into_iter() + .map(|v| v.build()) + .collect::>() + } + EmitTo::First(n) => { + let output = self + .group_values + .iter_mut() + .map(|v| v.take_n(n)) + .collect::>(); + + output + } + }; + + // TODO: Materialize dictionaries in group keys (#7647) + for (field, array) in self.schema.fields.iter().zip(&mut output) { + let expected = field.data_type(); + if let DataType::Dictionary(_, v) = expected { + let actual = array.data_type(); + if v.as_ref() != actual { + return Err(DataFusionError::Internal(format!( + "Converted group rows expected dictionary of {v} got {actual}" + ))); + } + *array = cast(array.as_ref(), expected)?; + } + } + + Ok(output) */ + todo!() + } + + fn clear_shrink(&mut self, batch: &RecordBatch) { + self.group_values.clear(); + self.rows_inds.clear(); + self.equal_to_results.clear(); + } +} */ diff --git a/rust/cubestore/cubestore/src/queryplanner/mod.rs b/rust/cubestore/cubestore/src/queryplanner/mod.rs index cd86bb5629346..464337c5bb5f3 100644 --- a/rust/cubestore/cubestore/src/queryplanner/mod.rs +++ b/rust/cubestore/cubestore/src/queryplanner/mod.rs @@ -30,6 +30,7 @@ mod rolling; mod test_utils; pub mod udf_xirr; pub mod udfs; +mod inline_aggregate; use crate::cachestore::CacheStore; use crate::config::injection::DIService; @@ -310,7 +311,7 @@ impl QueryPlannerImpl { .execution .dont_parallelize_sort_preserving_merge_exec_inputs = true; config.options_mut().execution.batch_size = Self::EXECUTION_BATCH_SIZE; - config.options_mut().execution.parquet.split_row_group_reads = true; + config.options_mut().execution.parquet.split_row_group_reads = false; // TODO upgrade DF: build SessionContexts consistently let state = Self::minimal_session_state_from_final_config(config) diff --git a/rust/cubestore/cubestore/src/queryplanner/pretty_printers.rs b/rust/cubestore/cubestore/src/queryplanner/pretty_printers.rs index 8478c5d67e497..fac32b4f8e63c 100644 --- a/rust/cubestore/cubestore/src/queryplanner/pretty_printers.rs +++ b/rust/cubestore/cubestore/src/queryplanner/pretty_printers.rs @@ -43,7 +43,7 @@ use crate::queryplanner::topk::{ AggregateTopKExec, ClusterAggregateTopKLower, ClusterAggregateTopKUpper, }; use crate::queryplanner::{CubeTableLogical, InfoSchemaTableProvider, QueryPlan}; -use crate::streaming::topic_table_provider::TopicTableProvider; +//use crate::streaming::topic_table_provider::TopicTableProvider; use datafusion::physical_plan::empty::EmptyExec; use datafusion::physical_plan::expressions::Column; use datafusion::physical_plan::joins::{HashJoinExec, SortMergeJoinExec}; @@ -481,8 +481,6 @@ fn pp_source(t: Arc) -> String { .downcast_ref::() { "InfoSchemaQueryCacheTableProvider".to_string() - } else if let Some(_) = t.as_any().downcast_ref::() { - "TopicTableProvider".to_string() } else { panic!("unknown table provider"); } diff --git a/rust/cubestore/cubestore/src/queryplanner/query_executor.rs b/rust/cubestore/cubestore/src/queryplanner/query_executor.rs index 47b5b9f7caa34..7072ad8c59180 100644 --- a/rust/cubestore/cubestore/src/queryplanner/query_executor.rs +++ b/rust/cubestore/cubestore/src/queryplanner/query_executor.rs @@ -486,7 +486,7 @@ impl QueryExecutorImpl { .with_target_partitions(2) .with_prefer_existing_sort(true) .with_round_robin_repartition(false); - config.options_mut().execution.parquet.split_row_group_reads = true; + config.options_mut().execution.parquet.split_row_group_reads = false; config.options_mut().optimizer.prefer_hash_join = false; // Redundant with the commented CoalesceBatches::new() line in `Self::optimizer_rules` config.options_mut().execution.coalesce_batches = false; @@ -1178,6 +1178,7 @@ impl ExecutionPlan for CubeTableExec { mut partition: usize, context: Arc, ) -> Result { + println!("!!! Table exec: {}, {}", self.partition_execs.len(), partition); let exec = self .partition_execs .iter() diff --git a/rust/cubestore/rust-toolchain.toml b/rust/cubestore/rust-toolchain.toml index 935f99e36558c..ad8132da3e1bc 100644 --- a/rust/cubestore/rust-toolchain.toml +++ b/rust/cubestore/rust-toolchain.toml @@ -1,4 +1,4 @@ [toolchain] -channel = "nightly-2024-10-30" +channel = "nightly-2025-08-01" components = ["rustfmt", "clippy"] profile = "minimal" From d14c4ad48903d429457da7e0dfbb1eee19bebe1e Mon Sep 17 00:00:00 2001 From: Alexandr Romanenko Date: Mon, 20 Oct 2025 16:15:15 +0200 Subject: [PATCH 2/9] in work --- .../inline_aggregate/sorted_group_values.rs | 166 +++++++++--------- 1 file changed, 82 insertions(+), 84 deletions(-) diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs index 815274af16a85..47ea718a804d5 100644 --- a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs +++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs @@ -27,82 +27,6 @@ pub struct SortedGroupValues { equal_to_results: Vec, } -impl SortedGroupValues { - pub fn try_new(schema: SchemaRef) -> DFResult { - Ok(Self { - schema, - group_values: vec![], - rows_inds: vec![], - equal_to_results: vec![], - }) - } - - fn intern_impl(&mut self, cols: &[ArrayRef], groups: &mut Vec) -> DFResult<()> { - /* let n_rows = cols[0].len(); - groups.clear(); - - if n_rows == 0 { - return Ok(()); - } - - let first_group_idx = self.make_new_group_if_needed(cols, 0); - groups.push(first_group_idx); - - if n_rows == 1 { - return Ok(()); - } - - if self.rows_inds.len() < n_rows { - let old_len = self.rows_inds.len(); - self.rows_inds.extend(old_len..n_rows); - } - - self.equal_to_results.fill(true); - self.equal_to_results.resize(n_rows - 1, true); - - let lhs_rows = &self.rows_inds[0..n_rows - 1]; - let rhs_rows = &self.rows_inds[1..n_rows]; - for (col_idx, group_col) in self.group_values.iter().enumerate() { - cols[col_idx].vectorized_equal_to( - lhs_rows, - &cols[col_idx], - rhs_rows, - &mut self.equal_to_results, - ); - } - println!("!!!!! AAAAAAAAAA"); - let mut current_group_idx = first_group_idx; - for i in 0..n_rows - 1 { - if !self.equal_to_results[i] { - for (col_idx, group_value) in self.group_values.iter_mut().enumerate() { - group_value.append_val(&cols[col_idx], i + 1); - } - current_group_idx = self.group_values[0].len() - 1; - } - groups.push(current_group_idx); - } - println!("!!!!! BBBBBBB"); - Ok(()) */ - Ok(()) - } - - fn make_new_group_if_needed(&mut self, cols: &[ArrayRef], row: usize) -> usize { - let new_group_needed = if self.group_values[0].len() == 0 { - true - } else { - self.group_values.iter().enumerate().any(|(i, group_val)| { - !group_val.equal_to(self.group_values[0].len() - 1, &cols[i], row) - }) - }; - if new_group_needed { - for (i, group_value) in self.group_values.iter_mut().enumerate() { - group_value.append_val(&cols[i], row); - } - } - self.group_values[0].len() - 1 - } -} - /// instantiates a [`PrimitiveGroupValueBuilder`] and pushes it into $v /// /// Arguments: @@ -122,8 +46,17 @@ macro_rules! instantiate_primitive { }; } -/* impl GroupValues for SortedGroupValues { - fn intern(&mut self, cols: &[ArrayRef], groups: &mut Vec) -> Result<()> { +impl SortedGroupValues { + pub fn try_new(schema: SchemaRef) -> DFResult { + Ok(Self { + schema, + group_values: vec![], + rows_inds: vec![], + equal_to_results: vec![], + }) + } + + pub fn intern(&mut self, cols: &[ArrayRef], groups: &mut Vec) -> DFResult<()> { if self.group_values.is_empty() { let mut v = Vec::with_capacity(cols.len()); @@ -231,7 +164,7 @@ macro_rules! instantiate_primitive { let b = ByteViewGroupValueBuilder::::new(); v.push(Box::new(b) as _) } - dt => return not_impl_err!("{dt} not supported in GroupValuesColumn"), + dt => return not_impl_err!("{dt} not supported in SortedGroupValues"), } } self.group_values = v; @@ -239,16 +172,16 @@ macro_rules! instantiate_primitive { self.intern_impl(cols, groups) } - fn size(&self) -> usize { + pub fn size(&self) -> usize { let group_values_size: usize = self.group_values.iter().map(|v| v.size()).sum(); group_values_size } - fn is_empty(&self) -> bool { + pub fn is_empty(&self) -> bool { self.len() == 0 } - fn len(&self) -> usize { + pub fn len(&self) -> usize { if self.group_values.is_empty() { return 0; } @@ -256,7 +189,7 @@ macro_rules! instantiate_primitive { self.group_values[0].len() } - fn emit(&mut self) -> Result> { + pub fn emit(&mut self) -> DFResult> { /* let mut output = match emit_to { EmitTo::All => { let group_values = mem::take(&mut self.group_values); @@ -301,4 +234,69 @@ macro_rules! instantiate_primitive { self.rows_inds.clear(); self.equal_to_results.clear(); } -} */ + + fn intern_impl(&mut self, cols: &[ArrayRef], groups: &mut Vec) -> DFResult<()> { + /* let n_rows = cols[0].len(); + groups.clear(); + + if n_rows == 0 { + return Ok(()); + } + + let first_group_idx = self.make_new_group_if_needed(cols, 0); + groups.push(first_group_idx); + + if n_rows == 1 { + return Ok(()); + } + + if self.rows_inds.len() < n_rows { + let old_len = self.rows_inds.len(); + self.rows_inds.extend(old_len..n_rows); + } + + self.equal_to_results.fill(true); + self.equal_to_results.resize(n_rows - 1, true); + + let lhs_rows = &self.rows_inds[0..n_rows - 1]; + let rhs_rows = &self.rows_inds[1..n_rows]; + for (col_idx, group_col) in self.group_values.iter().enumerate() { + cols[col_idx].vectorized_equal_to( + lhs_rows, + &cols[col_idx], + rhs_rows, + &mut self.equal_to_results, + ); + } + println!("!!!!! AAAAAAAAAA"); + let mut current_group_idx = first_group_idx; + for i in 0..n_rows - 1 { + if !self.equal_to_results[i] { + for (col_idx, group_value) in self.group_values.iter_mut().enumerate() { + group_value.append_val(&cols[col_idx], i + 1); + } + current_group_idx = self.group_values[0].len() - 1; + } + groups.push(current_group_idx); + } + println!("!!!!! BBBBBBB"); + Ok(()) */ + Ok(()) + } + + fn make_new_group_if_needed(&mut self, cols: &[ArrayRef], row: usize) -> usize { + let new_group_needed = if self.group_values[0].len() == 0 { + true + } else { + self.group_values.iter().enumerate().any(|(i, group_val)| { + !group_val.equal_to(self.group_values[0].len() - 1, &cols[i], row) + }) + }; + if new_group_needed { + for (i, group_value) in self.group_values.iter_mut().enumerate() { + group_value.append_val(&cols[i], row); + } + } + self.group_values[0].len() - 1 + } +} From 50517a5e7773d0b1a318aa250b600b4b2082cff5 Mon Sep 17 00:00:00 2001 From: Alexandr Romanenko Date: Mon, 20 Oct 2025 16:54:19 +0200 Subject: [PATCH 3/9] in work --- .../inline_aggregate/column_comparator.rs | 262 ++++++++++++++++++ .../src/queryplanner/inline_aggregate/mod.rs | 1 + 2 files changed, 263 insertions(+) create mode 100644 rust/cubestore/cubestore/src/queryplanner/inline_aggregate/column_comparator.rs diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/column_comparator.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/column_comparator.rs new file mode 100644 index 0000000000000..df4e0c12a4e73 --- /dev/null +++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/column_comparator.rs @@ -0,0 +1,262 @@ +use datafusion::arrow::array::*; +use datafusion::arrow::datatypes::*; +use std::marker::PhantomData; + +/// Trait for comparing adjacent rows in an array to detect group boundaries. +/// Used in sorted group-by operations to efficiently find where groups change. +pub trait ColumnComparator: Send + Sync { + /// Compare adjacent rows in the column, updating `equal_results`. + /// + /// For each index i in 0..equal_results.len(): + /// - If equal_results[i] is true, compares row[i] with row[i+1] + /// - Sets equal_results[i] to false if rows differ (group boundary) + /// - Leaves equal_results[i] unchanged if already false (short-circuit) + fn compare_adjacent(&self, col: &ArrayRef, equal_results: &mut [bool]); +} + +/// Comparator for primitive types (integers, floats, decimals, dates, timestamps). +/// +/// Uses const generic NULLABLE parameter to eliminate null-checking overhead +/// for NOT NULL columns at compile time. +pub struct PrimitiveComparator +where + T::Native: PartialEq, + T: Send + Sync, +{ + _phantom: PhantomData, +} + +impl PrimitiveComparator +where + T::Native: PartialEq, + T: Send + Sync, +{ + pub fn new() -> Self { + Self { + _phantom: PhantomData, + } + } +} + +impl ColumnComparator + for PrimitiveComparator +where + T::Native: PartialEq, + T: Send + Sync, +{ + #[inline] + fn compare_adjacent(&self, col: &ArrayRef, equal_results: &mut [bool]) { + let array = col.as_primitive::(); + + let values = array.values(); + + if NULLABLE { + // Nullable column - check if there are actually any nulls + if array.null_count() == 0 { + // Fast path: column is nullable but this batch has no nulls + for i in 0..equal_results.len() { + if equal_results[i] { + equal_results[i] = values[i] == values[i + 1]; + } + } + } else { + // Slow path: need to check null bitmap + let nulls = array.nulls().expect("null_count > 0 but no nulls bitmap"); + for i in 0..equal_results.len() { + if equal_results[i] { + let null1 = nulls.is_null(i); + let null2 = nulls.is_null(i + 1); + + // Both must be null or both must be non-null with equal values + equal_results[i] = + (null1 == null2) && (null1 || values[i] == values[i + 1]); + } + } + } + } else { + // NOT NULL column - no null checks needed, compiler will optimize this aggressively + for i in 0..equal_results.len() { + if equal_results[i] { + equal_results[i] = values[i] == values[i + 1]; + } + } + } + } +} + +/// Comparator for byte array types (Utf8, LargeUtf8, Binary, LargeBinary). +/// +/// Uses generic over ByteArrayType to handle both i32 and i64 offset variants. +pub struct ByteArrayComparator { + _phantom: PhantomData, +} + +impl ByteArrayComparator { + pub fn new() -> Self { + Self { + _phantom: PhantomData, + } + } +} + +impl ColumnComparator + for ByteArrayComparator +where + T::Native: PartialEq, +{ + #[inline] + fn compare_adjacent(&self, col: &ArrayRef, equal_results: &mut [bool]) { + let array = col.as_bytes::(); + + if NULLABLE { + if array.null_count() == 0 { + // Fast path: no nulls in this batch + for i in 0..equal_results.len() { + if equal_results[i] { + equal_results[i] = array.value(i) == array.value(i + 1); + } + } + } else { + // Use iterator which handles nulls efficiently + let mut iter1 = array.iter(); + let mut iter2 = array.iter().skip(1); + + for (i, (v1, v2)) in iter1.zip(iter2).enumerate() { + if equal_results[i] { + equal_results[i] = v1 == v2; + } + } + } + } else { + // NOT NULL column - direct value comparison + for i in 0..equal_results.len() { + if equal_results[i] { + equal_results[i] = array.value(i) == array.value(i + 1); + } + } + } + } +} + +/// Comparator for ByteView types (Utf8View, BinaryView). +/// +/// ByteView arrays store short strings (<=12 bytes) inline, allowing fast comparison +/// of the view value before comparing full string data. +pub struct ByteViewComparator { + _phantom: PhantomData, +} + +impl ByteViewComparator { + pub fn new() -> Self { + Self { + _phantom: PhantomData, + } + } +} + +impl ColumnComparator + for ByteViewComparator +where + T::Native: PartialEq, +{ + #[inline] + fn compare_adjacent(&self, col: &ArrayRef, equal_results: &mut [bool]) { + let array = col.as_byte_view::(); + + if NULLABLE { + if array.null_count() == 0 { + // Fast path: no nulls + for i in 0..equal_results.len() { + if equal_results[i] { + equal_results[i] = array.value(i) == array.value(i + 1); + } + } + } else { + // Handle nulls via iterator + let mut iter1 = array.iter(); + let mut iter2 = array.iter().skip(1); + + for (i, (v1, v2)) in iter1.zip(iter2).enumerate() { + if equal_results[i] { + equal_results[i] = v1 == v2; + } + } + } + } else { + // NOT NULL column + for i in 0..equal_results.len() { + if equal_results[i] { + equal_results[i] = array.value(i) == array.value(i + 1); + } + } + } + } +} + +/// Instantiate a primitive comparator and push it into the vector. +/// +/// Handles const generic NULLABLE parameter based on field nullability. +#[macro_export] +macro_rules! instantiate_primitive_comparator { + ($v:expr, $nullable:expr, $t:ty) => { + if $nullable { + $v.push(Box::new( + $crate::queryplanner::inline_aggregate::column_comparator::PrimitiveComparator::< + $t, + true, + >::new(), + ) as _) + } else { + $v.push(Box::new( + $crate::queryplanner::inline_aggregate::column_comparator::PrimitiveComparator::< + $t, + false, + >::new(), + ) as _) + } + }; +} + +/// Instantiate a byte array comparator and push it into the vector. +#[macro_export] +macro_rules! instantiate_byte_array_comparator { + ($v:expr, $nullable:expr, $t:ty) => { + if $nullable { + $v.push(Box::new( + $crate::queryplanner::inline_aggregate::column_comparator::ByteArrayComparator::< + $t, + true, + >::new(), + ) as _) + } else { + $v.push(Box::new( + $crate::queryplanner::inline_aggregate::column_comparator::ByteArrayComparator::< + $t, + false, + >::new(), + ) as _) + } + }; +} + +/// Instantiate a byte view comparator and push it into the vector. +#[macro_export] +macro_rules! instantiate_byte_view_comparator { + ($v:expr, $nullable:expr, $t:ty) => { + if $nullable { + $v.push(Box::new( + $crate::queryplanner::inline_aggregate::column_comparator::ByteViewComparator::< + $t, + true, + >::new(), + ) as _) + } else { + $v.push(Box::new( + $crate::queryplanner::inline_aggregate::column_comparator::ByteViewComparator::< + $t, + false, + >::new(), + ) as _) + } + }; +} diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs index 6f78a7ce9b375..fca0f1d38019b 100644 --- a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs +++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs @@ -1,3 +1,4 @@ +mod column_comparator; mod inline_aggregate_stream; mod sorted_group_values; use crate::cluster::{ From 6ae25f6f2f57c6acfc2697530f91a4d82a8bddbe Mon Sep 17 00:00:00 2001 From: Alexandr Romanenko Date: Mon, 20 Oct 2025 16:59:27 +0200 Subject: [PATCH 4/9] in work --- .../inline_aggregate/sorted_group_values.rs | 104 ++++++++++++------ 1 file changed, 73 insertions(+), 31 deletions(-) diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs index 47ea718a804d5..f757544a8edb5 100644 --- a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs +++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs @@ -5,11 +5,12 @@ use std::mem::{self, size_of}; use datafusion::arrow::array::{Array, ArrayRef, RecordBatch}; use datafusion::arrow::compute::cast; use datafusion::arrow::datatypes::{ - BinaryViewType, DataType, Date32Type, Date64Type, Decimal128Type, Float32Type, Float64Type, - Int16Type, Int32Type, Int64Type, Int8Type, Schema, SchemaRef, StringViewType, - Time32MillisecondType, Time32SecondType, Time64MicrosecondType, Time64NanosecondType, TimeUnit, - TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType, - TimestampSecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type, + BinaryType, BinaryViewType, DataType, Date32Type, Date64Type, Decimal128Type, Float32Type, + Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, LargeBinaryType, LargeUtf8Type, + Schema, SchemaRef, StringViewType, Time32MillisecondType, Time32SecondType, + Time64MicrosecondType, Time64NanosecondType, TimeUnit, TimestampMicrosecondType, + TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, UInt16Type, + UInt32Type, UInt64Type, UInt8Type, Utf8Type, }; use datafusion::dfschema::internal_err; use datafusion::dfschema::not_impl_err; @@ -19,11 +20,22 @@ use datafusion::physical_plan::aggregates::group_values::multi_group_by::{ ByteGroupValueBuilder, ByteViewGroupValueBuilder, PrimitiveGroupValueBuilder, }; +use crate::queryplanner::inline_aggregate::column_comparator::ColumnComparator; +use crate::{ + instantiate_byte_array_comparator, instantiate_byte_view_comparator, + instantiate_primitive_comparator, +}; + pub struct SortedGroupValues { /// The output schema schema: SchemaRef, + /// Group value builders for each grouping column group_values: Vec>, + /// Column comparators for detecting group boundaries + comparators: Vec>, + /// Reusable buffer for row indices (not currently used) rows_inds: Vec, + /// Reusable buffer for equality comparison results equal_to_results: Vec, } @@ -51,6 +63,7 @@ impl SortedGroupValues { Ok(Self { schema, group_values: vec![], + comparators: vec![], rows_inds: vec![], equal_to_results: vec![], }) @@ -59,77 +72,98 @@ impl SortedGroupValues { pub fn intern(&mut self, cols: &[ArrayRef], groups: &mut Vec) -> DFResult<()> { if self.group_values.is_empty() { let mut v = Vec::with_capacity(cols.len()); + let mut comparators = Vec::with_capacity(cols.len()); for f in self.schema.fields().iter() { let nullable = f.is_nullable(); let data_type = f.data_type(); match data_type { &DataType::Int8 => { - instantiate_primitive!(v, nullable, Int8Type, data_type) + instantiate_primitive!(v, nullable, Int8Type, data_type); + instantiate_primitive_comparator!(comparators, nullable, Int8Type); } &DataType::Int16 => { - instantiate_primitive!(v, nullable, Int16Type, data_type) + instantiate_primitive!(v, nullable, Int16Type, data_type); + instantiate_primitive_comparator!(comparators, nullable, Int16Type); } &DataType::Int32 => { - instantiate_primitive!(v, nullable, Int32Type, data_type) + instantiate_primitive!(v, nullable, Int32Type, data_type); + instantiate_primitive_comparator!(comparators, nullable, Int32Type); } &DataType::Int64 => { - instantiate_primitive!(v, nullable, Int64Type, data_type) + instantiate_primitive!(v, nullable, Int64Type, data_type); + instantiate_primitive_comparator!(comparators, nullable, Int64Type); } &DataType::UInt8 => { - instantiate_primitive!(v, nullable, UInt8Type, data_type) + instantiate_primitive!(v, nullable, UInt8Type, data_type); + instantiate_primitive_comparator!(comparators, nullable, UInt8Type); } &DataType::UInt16 => { - instantiate_primitive!(v, nullable, UInt16Type, data_type) + instantiate_primitive!(v, nullable, UInt16Type, data_type); + instantiate_primitive_comparator!(comparators, nullable, UInt16Type); } &DataType::UInt32 => { - instantiate_primitive!(v, nullable, UInt32Type, data_type) + instantiate_primitive!(v, nullable, UInt32Type, data_type); + instantiate_primitive_comparator!(comparators, nullable, UInt32Type); } &DataType::UInt64 => { - instantiate_primitive!(v, nullable, UInt64Type, data_type) + instantiate_primitive!(v, nullable, UInt64Type, data_type); + instantiate_primitive_comparator!(comparators, nullable, UInt64Type); } &DataType::Float32 => { - instantiate_primitive!(v, nullable, Float32Type, data_type) + instantiate_primitive!(v, nullable, Float32Type, data_type); + instantiate_primitive_comparator!(comparators, nullable, Float32Type); } &DataType::Float64 => { - instantiate_primitive!(v, nullable, Float64Type, data_type) + instantiate_primitive!(v, nullable, Float64Type, data_type); + instantiate_primitive_comparator!(comparators, nullable, Float64Type); } &DataType::Date32 => { - instantiate_primitive!(v, nullable, Date32Type, data_type) + instantiate_primitive!(v, nullable, Date32Type, data_type); + instantiate_primitive_comparator!(comparators, nullable, Date32Type); } &DataType::Date64 => { - instantiate_primitive!(v, nullable, Date64Type, data_type) + instantiate_primitive!(v, nullable, Date64Type, data_type); + instantiate_primitive_comparator!(comparators, nullable, Date64Type); } &DataType::Time32(t) => match t { TimeUnit::Second => { - instantiate_primitive!(v, nullable, Time32SecondType, data_type) + instantiate_primitive!(v, nullable, Time32SecondType, data_type); + instantiate_primitive_comparator!(comparators, nullable, Time32SecondType); } TimeUnit::Millisecond => { - instantiate_primitive!(v, nullable, Time32MillisecondType, data_type) + instantiate_primitive!(v, nullable, Time32MillisecondType, data_type); + instantiate_primitive_comparator!(comparators, nullable, Time32MillisecondType); } _ => {} }, &DataType::Time64(t) => match t { TimeUnit::Microsecond => { - instantiate_primitive!(v, nullable, Time64MicrosecondType, data_type) + instantiate_primitive!(v, nullable, Time64MicrosecondType, data_type); + instantiate_primitive_comparator!(comparators, nullable, Time64MicrosecondType); } TimeUnit::Nanosecond => { - instantiate_primitive!(v, nullable, Time64NanosecondType, data_type) + instantiate_primitive!(v, nullable, Time64NanosecondType, data_type); + instantiate_primitive_comparator!(comparators, nullable, Time64NanosecondType); } _ => {} }, &DataType::Timestamp(t, _) => match t { TimeUnit::Second => { - instantiate_primitive!(v, nullable, TimestampSecondType, data_type) + instantiate_primitive!(v, nullable, TimestampSecondType, data_type); + instantiate_primitive_comparator!(comparators, nullable, TimestampSecondType); } TimeUnit::Millisecond => { - instantiate_primitive!(v, nullable, TimestampMillisecondType, data_type) + instantiate_primitive!(v, nullable, TimestampMillisecondType, data_type); + instantiate_primitive_comparator!(comparators, nullable, TimestampMillisecondType); } TimeUnit::Microsecond => { - instantiate_primitive!(v, nullable, TimestampMicrosecondType, data_type) + instantiate_primitive!(v, nullable, TimestampMicrosecondType, data_type); + instantiate_primitive_comparator!(comparators, nullable, TimestampMicrosecondType); } TimeUnit::Nanosecond => { - instantiate_primitive!(v, nullable, TimestampNanosecondType, data_type) + instantiate_primitive!(v, nullable, TimestampNanosecondType, data_type); + instantiate_primitive_comparator!(comparators, nullable, TimestampNanosecondType); } }, &DataType::Decimal128(_, _) => { @@ -139,35 +173,43 @@ impl SortedGroupValues { Decimal128Type, data_type } + instantiate_primitive_comparator!(comparators, nullable, Decimal128Type); } &DataType::Utf8 => { let b = ByteGroupValueBuilder::::new(OutputType::Utf8); - v.push(Box::new(b) as _) + v.push(Box::new(b) as _); + instantiate_byte_array_comparator!(comparators, nullable, Utf8Type); } &DataType::LargeUtf8 => { let b = ByteGroupValueBuilder::::new(OutputType::Utf8); - v.push(Box::new(b) as _) + v.push(Box::new(b) as _); + instantiate_byte_array_comparator!(comparators, nullable, LargeUtf8Type); } &DataType::Binary => { let b = ByteGroupValueBuilder::::new(OutputType::Binary); - v.push(Box::new(b) as _) + v.push(Box::new(b) as _); + instantiate_byte_array_comparator!(comparators, nullable, BinaryType); } &DataType::LargeBinary => { let b = ByteGroupValueBuilder::::new(OutputType::Binary); - v.push(Box::new(b) as _) + v.push(Box::new(b) as _); + instantiate_byte_array_comparator!(comparators, nullable, LargeBinaryType); } &DataType::Utf8View => { let b = ByteViewGroupValueBuilder::::new(); - v.push(Box::new(b) as _) + v.push(Box::new(b) as _); + instantiate_byte_view_comparator!(comparators, nullable, StringViewType); } &DataType::BinaryView => { let b = ByteViewGroupValueBuilder::::new(); - v.push(Box::new(b) as _) + v.push(Box::new(b) as _); + instantiate_byte_view_comparator!(comparators, nullable, BinaryViewType); } dt => return not_impl_err!("{dt} not supported in SortedGroupValues"), } } self.group_values = v; + self.comparators = comparators; } self.intern_impl(cols, groups) } From 2ef63e94bc695220edfd6fec23c5568918171370 Mon Sep 17 00:00:00 2001 From: Alexandr Romanenko Date: Mon, 20 Oct 2025 17:18:42 +0200 Subject: [PATCH 5/9] in work --- .../inline_aggregate/sorted_group_values.rs | 126 ++++++++++++------ 1 file changed, 86 insertions(+), 40 deletions(-) diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs index f757544a8edb5..d47c47381b7b6 100644 --- a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs +++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs @@ -1,3 +1,4 @@ +use datafusion::logical_expr::EmitTo; use datafusion::physical_plan::aggregates::group_values::multi_group_by::GroupColumn; use std::mem::{self, size_of}; @@ -6,15 +7,15 @@ use datafusion::arrow::array::{Array, ArrayRef, RecordBatch}; use datafusion::arrow::compute::cast; use datafusion::arrow::datatypes::{ BinaryType, BinaryViewType, DataType, Date32Type, Date64Type, Decimal128Type, Float32Type, - Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, LargeBinaryType, LargeUtf8Type, - Schema, SchemaRef, StringViewType, Time32MillisecondType, Time32SecondType, - Time64MicrosecondType, Time64NanosecondType, TimeUnit, TimestampMicrosecondType, - TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, UInt16Type, - UInt32Type, UInt64Type, UInt8Type, Utf8Type, + Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, LargeBinaryType, LargeUtf8Type, Schema, + SchemaRef, StringViewType, Time32MillisecondType, Time32SecondType, Time64MicrosecondType, + Time64NanosecondType, TimeUnit, TimestampMicrosecondType, TimestampMillisecondType, + TimestampNanosecondType, TimestampSecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type, + Utf8Type, }; use datafusion::dfschema::internal_err; use datafusion::dfschema::not_impl_err; -use datafusion::error::Result as DFResult; +use datafusion::error::{DataFusionError, Result as DFResult}; use datafusion::physical_expr::binary_map::OutputType; use datafusion::physical_plan::aggregates::group_values::multi_group_by::{ ByteGroupValueBuilder, ByteViewGroupValueBuilder, PrimitiveGroupValueBuilder, @@ -129,41 +130,83 @@ impl SortedGroupValues { &DataType::Time32(t) => match t { TimeUnit::Second => { instantiate_primitive!(v, nullable, Time32SecondType, data_type); - instantiate_primitive_comparator!(comparators, nullable, Time32SecondType); + instantiate_primitive_comparator!( + comparators, + nullable, + Time32SecondType + ); } TimeUnit::Millisecond => { instantiate_primitive!(v, nullable, Time32MillisecondType, data_type); - instantiate_primitive_comparator!(comparators, nullable, Time32MillisecondType); + instantiate_primitive_comparator!( + comparators, + nullable, + Time32MillisecondType + ); } _ => {} }, &DataType::Time64(t) => match t { TimeUnit::Microsecond => { instantiate_primitive!(v, nullable, Time64MicrosecondType, data_type); - instantiate_primitive_comparator!(comparators, nullable, Time64MicrosecondType); + instantiate_primitive_comparator!( + comparators, + nullable, + Time64MicrosecondType + ); } TimeUnit::Nanosecond => { instantiate_primitive!(v, nullable, Time64NanosecondType, data_type); - instantiate_primitive_comparator!(comparators, nullable, Time64NanosecondType); + instantiate_primitive_comparator!( + comparators, + nullable, + Time64NanosecondType + ); } _ => {} }, &DataType::Timestamp(t, _) => match t { TimeUnit::Second => { instantiate_primitive!(v, nullable, TimestampSecondType, data_type); - instantiate_primitive_comparator!(comparators, nullable, TimestampSecondType); + instantiate_primitive_comparator!( + comparators, + nullable, + TimestampSecondType + ); } TimeUnit::Millisecond => { - instantiate_primitive!(v, nullable, TimestampMillisecondType, data_type); - instantiate_primitive_comparator!(comparators, nullable, TimestampMillisecondType); + instantiate_primitive!( + v, + nullable, + TimestampMillisecondType, + data_type + ); + instantiate_primitive_comparator!( + comparators, + nullable, + TimestampMillisecondType + ); } TimeUnit::Microsecond => { - instantiate_primitive!(v, nullable, TimestampMicrosecondType, data_type); - instantiate_primitive_comparator!(comparators, nullable, TimestampMicrosecondType); + instantiate_primitive!( + v, + nullable, + TimestampMicrosecondType, + data_type + ); + instantiate_primitive_comparator!( + comparators, + nullable, + TimestampMicrosecondType + ); } TimeUnit::Nanosecond => { instantiate_primitive!(v, nullable, TimestampNanosecondType, data_type); - instantiate_primitive_comparator!(comparators, nullable, TimestampNanosecondType); + instantiate_primitive_comparator!( + comparators, + nullable, + TimestampNanosecondType + ); } }, &DataType::Decimal128(_, _) => { @@ -231,8 +274,8 @@ impl SortedGroupValues { self.group_values[0].len() } - pub fn emit(&mut self) -> DFResult> { - /* let mut output = match emit_to { + fn emit(&mut self, emit_to: EmitTo) -> DFResult> { + let mut output = match emit_to { EmitTo::All => { let group_values = mem::take(&mut self.group_values); debug_assert!(self.group_values.is_empty()); @@ -253,7 +296,6 @@ impl SortedGroupValues { } }; - // TODO: Materialize dictionaries in group keys (#7647) for (field, array) in self.schema.fields.iter().zip(&mut output) { let expected = field.data_type(); if let DataType::Dictionary(_, v) = expected { @@ -267,24 +309,25 @@ impl SortedGroupValues { } } - Ok(output) */ - todo!() + Ok(output) } fn clear_shrink(&mut self, batch: &RecordBatch) { self.group_values.clear(); + self.comparators.clear(); self.rows_inds.clear(); self.equal_to_results.clear(); } fn intern_impl(&mut self, cols: &[ArrayRef], groups: &mut Vec) -> DFResult<()> { - /* let n_rows = cols[0].len(); + let n_rows = cols[0].len(); groups.clear(); if n_rows == 0 { return Ok(()); } + // Handle first row - compare with last group or create new group let first_group_idx = self.make_new_group_if_needed(cols, 0); groups.push(first_group_idx); @@ -292,28 +335,20 @@ impl SortedGroupValues { return Ok(()); } - if self.rows_inds.len() < n_rows { - let old_len = self.rows_inds.len(); - self.rows_inds.extend(old_len..n_rows); - } - - self.equal_to_results.fill(true); + // Prepare buffer for vectorized comparison self.equal_to_results.resize(n_rows - 1, true); + self.equal_to_results[..n_rows - 1].fill(true); - let lhs_rows = &self.rows_inds[0..n_rows - 1]; - let rhs_rows = &self.rows_inds[1..n_rows]; - for (col_idx, group_col) in self.group_values.iter().enumerate() { - cols[col_idx].vectorized_equal_to( - lhs_rows, - &cols[col_idx], - rhs_rows, - &mut self.equal_to_results, - ); + // Vectorized comparison: compare row[i] with row[i+1] for all columns + for (col, comparator) in cols.iter().zip(&self.comparators) { + comparator.compare_adjacent(col, &mut self.equal_to_results[..n_rows - 1]); } - println!("!!!!! AAAAAAAAAA"); + + // Build groups based on comparison results let mut current_group_idx = first_group_idx; for i in 0..n_rows - 1 { if !self.equal_to_results[i] { + // Group boundary detected - add new group for (col_idx, group_value) in self.group_values.iter_mut().enumerate() { group_value.append_val(&cols[col_idx], i + 1); } @@ -321,24 +356,35 @@ impl SortedGroupValues { } groups.push(current_group_idx); } - println!("!!!!! BBBBBBB"); - Ok(()) */ + Ok(()) } + /// Compare the specified row with the last group and create a new group if different. + /// + /// This is used to handle the first row of a batch, which needs to be compared + /// with the last group from the previous batch to detect group boundaries across batches. + /// + /// Returns the group index for this row. fn make_new_group_if_needed(&mut self, cols: &[ArrayRef], row: usize) -> usize { let new_group_needed = if self.group_values[0].len() == 0 { + // No groups yet - always create first group true } else { + // Compare with last group - if any column differs, need new group self.group_values.iter().enumerate().any(|(i, group_val)| { !group_val.equal_to(self.group_values[0].len() - 1, &cols[i], row) }) }; + if new_group_needed { + // Add new group with values from this row for (i, group_value) in self.group_values.iter_mut().enumerate() { group_value.append_val(&cols[i], row); } } + + // Return index of the group (either newly created or existing last group) self.group_values[0].len() - 1 } } From db87d629a06ef5ec3c62dcd2591e7491598c913d Mon Sep 17 00:00:00 2001 From: Alexandr Romanenko Date: Mon, 20 Oct 2025 19:59:08 +0200 Subject: [PATCH 6/9] in work --- .../inline_aggregate_stream.rs | 299 +++++++++--------- .../src/queryplanner/inline_aggregate/mod.rs | 39 ++- .../inline_aggregate/sorted_group_values.rs | 2 +- .../inline_aggregate_rewriter.rs | 30 ++ .../src/queryplanner/optimizations/mod.rs | 6 + .../src/queryplanner/pretty_printers.rs | 13 + 6 files changed, 226 insertions(+), 163 deletions(-) create mode 100644 rust/cubestore/cubestore/src/queryplanner/optimizations/inline_aggregate_rewriter.rs diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/inline_aggregate_stream.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/inline_aggregate_stream.rs index 4da0b9b48f7b0..fc29dc584b45d 100644 --- a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/inline_aggregate_stream.rs +++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/inline_aggregate_stream.rs @@ -7,6 +7,7 @@ use crate::metastore::multi_index::MultiPartition; use crate::metastore::table::Table; use crate::metastore::{Column, ColumnType, IdRow, Index, Partition}; use crate::queryplanner::filter_by_key_range::FilterByKeyRangeExec; +use crate::queryplanner::inline_aggregate::sorted_group_values::SortedGroupValues; use crate::queryplanner::merge_sort::LastRowByUniqueKeyExec; use crate::queryplanner::metadata_cache::{MetadataCacheFactory, NoopParquetMetadataCache}; use crate::queryplanner::optimizations::{CubeQueryPlanner, PreOptimizeRule}; @@ -50,8 +51,8 @@ use datafusion::dfschema::internal_err; use datafusion::dfschema::not_impl_err; use datafusion::error::DataFusionError; use datafusion::error::Result as DFResult; -use datafusion::execution::TaskContext; -use datafusion::logical_expr::{Expr, GroupsAccumulator, LogicalPlan}; +use datafusion::execution::{RecordBatchStream, TaskContext}; +use datafusion::logical_expr::{EmitTo, Expr, GroupsAccumulator, LogicalPlan}; use datafusion::physical_expr::expressions::Column as DFColumn; use datafusion::physical_expr::LexOrdering; use datafusion::physical_expr::{self, GroupsAccumulatorAdapter}; @@ -135,6 +136,7 @@ pub(crate) struct InlineAggregateStream { input_done: bool, accumulators: Vec>, + group_values: SortedGroupValues, current_group_indices: Vec, } @@ -189,6 +191,7 @@ impl InlineAggregateStream { let exec_state = ExecutionState::ReadingInput; let current_group_indices = Vec::with_capacity(batch_size); + let group_values = SortedGroupValues::try_new(group_schema)?; Ok(InlineAggregateStream { schema: agg_schema, @@ -201,6 +204,7 @@ impl InlineAggregateStream { exec_state, batch_size, current_group_indices, + group_values, input_done: false, }) } @@ -303,144 +307,68 @@ impl Stream for InlineAggregateStream { ) -> Poll> { loop { match &self.exec_state { - ExecutionState::ReadingInput => 'reading_input: { + ExecutionState::ReadingInput => { match ready!(self.input.poll_next_unpin(cx)) { - // New batch to aggregate in partial aggregation operator - Some(Ok(batch)) if self.mode == InlineAggregateMode::Partial => { - /* let timer = elapsed_compute.timer(); - let input_rows = batch.num_rows(); - - // Do the grouping - self.group_aggregate_batch(batch)?; - - self.update_skip_aggregation_probe(input_rows); - - // If we can begin emitting rows, do so, - // otherwise keep consuming input - assert!(!self.input_done); - - // If the number of group values equals or exceeds the soft limit, - // emit all groups and switch to producing output - if self.hit_soft_group_limit() { - timer.done(); - self.set_input_done_and_produce_output()?; - // make sure the exec_state just set is not overwritten below - break 'reading_input; - } - - if let Some(to_emit) = self.group_ordering.emit_to() { - timer.done(); - if let Some(batch) = self.emit(to_emit, false)? { - - ExecutionState::ProducingOutput(batch); - }; - // make sure the exec_state just set is not overwritten below - break 'reading_input; - } - - self.emit_early_if_necessary()?; - - self.switch_to_skip_aggregation()?; - - timer.done(); */ - todo!() - } - - // New batch to aggregate in terminal aggregation operator - // (Final/FinalPartitioned/Single/SinglePartitioned) + // New input batch to aggregate Some(Ok(batch)) => { - /* let timer = elapsed_compute.timer(); - - // Make sure we have enough capacity for `batch`, otherwise spill - self.spill_previous_if_necessary(&batch)?; - - // Do the grouping - - - // If we can begin emitting rows, do so, - // otherwise keep consuming input - assert!(!self.input_done); - - // If the number of group values equals or exceeds the soft limit, - // emit all groups and switch to producing output - if self.hit_soft_group_limit() { - timer.done(); - self.set_input_done_and_produce_output()?; - // make sure the exec_state just set is not overwritten below - break 'reading_input; + // Aggregate the batch + if let Err(e) = self.group_aggregate_batch(batch) { + return Poll::Ready(Some(Err(e))); } - if let Some(to_emit) = self.group_ordering.emit_to() { - timer.done(); - if let Some(batch) = self.emit(to_emit, false)? { - self.exec_state = - ExecutionState::ProducingOutput(batch); - }; - // make sure the exec_state just set is not overwritten below - break 'reading_input; + // Try to emit a batch if we have enough groups + match self.emit_early_if_ready() { + Ok(Some(batch)) => { + self.exec_state = ExecutionState::ProducingOutput(batch); + } + Ok(None) => { + // Not enough groups yet, continue reading + } + Err(e) => { + return Poll::Ready(Some(Err(e))); + } } - - timer.done(); */ - todo!() } - // Found error from input stream + // Error from input stream Some(Err(e)) => { - // inner had error, return to caller return Poll::Ready(Some(Err(e))); } - // Found end from input stream + // Input stream exhausted - emit all remaining groups None => { - // inner is done, emit all rows and switch to producing output - //self.set_input_done_and_produce_output()?; - todo!() + self.input_done = true; + + match self.emit(EmitTo::All) { + Ok(Some(batch)) => { + self.exec_state = ExecutionState::ProducingOutput(batch); + } + Ok(None) => { + // No groups to emit, we're done + self.exec_state = ExecutionState::Done; + } + Err(e) => { + return Poll::Ready(Some(Err(e))); + } + } } } } ExecutionState::ProducingOutput(batch) => { - // slice off a part of the batch, if needed - /* let output_batch; - let size = self.batch_size; - (self.exec_state, output_batch) = if batch.num_rows() <= size { - ( - if self.input_done { - ExecutionState::Done - } - // In Partial aggregation, we also need to check - // if we should trigger partial skipping - else if self.mode == AggregateMode::Partial - && self.should_skip_aggregation() - { - ExecutionState::SkippingAggregation - } else { - ExecutionState::ReadingInput - }, - batch.clone(), - ) + let batch = batch.clone(); + + // Determine next state + self.exec_state = if self.input_done { + ExecutionState::Done } else { - // output first batch_size rows - let size = self.batch_size; - let num_remaining = batch.num_rows() - size; - let remaining = batch.slice(size, num_remaining); - let output = batch.slice(0, size); - (ExecutionState::ProducingOutput(remaining), output) + ExecutionState::ReadingInput }; - // Empty record batches should not be emitted. - // They need to be treated as [`Option`]es and handled separately - debug_assert!(output_batch.num_rows() > 0); - return Poll::Ready(Some(Ok( - output_batch.record_output(&self.baseline_metrics) - ))); */ - todo!() + + return Poll::Ready(Some(Ok(batch))); } ExecutionState::Done => { - // release the memory reservation since sending back output batch itself needs - // some memory reservation, so make some room for it. - /* self.clear_all(); - let _ = self.update_memory_reservation(); */ return Poll::Ready(None); } } @@ -449,9 +377,69 @@ impl Stream for InlineAggregateStream { } impl InlineAggregateStream { + /// Emit groups based on EmitTo strategy. + /// + /// Returns None if there are no groups to emit. + /// Emit groups based on EmitTo strategy. + /// + /// Returns None if there are no groups to emit. + fn emit(&mut self, emit_to: EmitTo) -> DFResult> { + if self.group_values.is_empty() { + return Ok(None); + } + + // Get group values arrays + let group_arrays = self.group_values.emit(emit_to)?; + + // Get aggregate arrays based on mode + let mut aggr_arrays = vec![]; + for acc in &mut self.accumulators { + match self.mode { + InlineAggregateMode::Partial => { + // Emit intermediate state + let state = acc.state(emit_to)?; + aggr_arrays.extend(state); + } + InlineAggregateMode::Final => { + // Emit final aggregated values + aggr_arrays.push(acc.evaluate(emit_to)?); + } + } + } + + // Combine group columns and aggregate columns + let mut columns = group_arrays; + columns.extend(aggr_arrays); + + let batch = RecordBatch::try_new(Arc::clone(&self.schema), columns)?; + + Ok(Some(batch)) + } + + /// Check if we have enough groups to emit a batch, keeping the last (potentially incomplete) group. + /// + /// For sorted aggregation, we emit batches of size batch_size when we have accumulated + /// more than batch_size groups. We always keep the last group as it may continue in the next input batch. + fn should_emit_early(&self) -> bool { + // Need at least (batch_size + 1) groups to emit batch_size and keep 1 + self.group_values.len() > self.batch_size + } + + /// Emit a batch of groups if we have enough accumulated, keeping the last group. + /// + /// Returns Some(batch) if emitted, None otherwise. + fn emit_early_if_ready(&mut self) -> DFResult> { + if !self.should_emit_early() { + return Ok(None); + } + + // Emit exactly batch_size groups, keeping the rest (including last incomplete group) + self.emit(EmitTo::First(self.batch_size)) + } + fn group_aggregate_batch(&mut self, batch: RecordBatch) -> DFResult<()> { // Evaluate the grouping expressions - /* let group_by_values = evaluate_group_by(&self.group_by, &batch)?; + let group_by_values = evaluate_group_by(&self.group_by, &batch)?; // Evaluate the aggregation expressions. let input_values = evaluate_many(&self.aggregate_arguments, &batch)?; @@ -459,48 +447,39 @@ impl InlineAggregateStream { // Evaluate the filter expressions, if any, against the inputs let filter_values = evaluate_optional(&self.filter_expressions, &batch)?; - for group_values in &group_by_values { - // calculate the group indices for each input row - let starting_num_groups = self.group_values.len(); - self.group_values - .intern(group_values, &mut self.current_group_indices)?; - let group_indices = &self.current_group_indices; - - // Update ordering information if necessary - /* let total_num_groups = self.group_values.len(); - if total_num_groups > starting_num_groups { - self.group_ordering - .new_groups(group_values, group_indices, total_num_groups)?; - } */ - - // Gather the inputs to call the actual accumulator - let t = self - .accumulators - .iter_mut() - .zip(input_values.iter()) - .zip(filter_values.iter()); - - for ((acc, values), opt_filter) in t { - let opt_filter = opt_filter.as_ref().map(|filter| filter.as_boolean()); - - // Call the appropriate method on each aggregator with - // the entire input row and the relevant group indexes - match self.mode { - InlineAggregateMode::Partial => { - acc.update_batch(values, group_indices, opt_filter, total_num_groups)?; + assert_eq!(group_by_values.len(), 1, "Exactly 1 group value required"); + self.group_values + .intern(&group_by_values[0], &mut self.current_group_indices)?; + let group_indices = &self.current_group_indices; + + let total_num_groups = self.group_values.len(); + // Gather the inputs to call the actual accumulator + let t = self + .accumulators + .iter_mut() + .zip(input_values.iter()) + .zip(filter_values.iter()); + + for ((acc, values), opt_filter) in t { + let opt_filter = opt_filter.as_ref().map(|filter| filter.as_boolean()); + + // Call the appropriate method on each aggregator with + // the entire input row and the relevant group indexes + match self.mode { + InlineAggregateMode::Partial => { + acc.update_batch(values, group_indices, opt_filter, total_num_groups)?; + } + _ => { + if opt_filter.is_some() { + return internal_err!("aggregate filter should be applied in partial stage, there should be no filter in final stage"); } - _ => { - if opt_filter.is_some() { - return internal_err!("aggregate filter should be applied in partial stage, there should be no filter in final stage"); - } - // if aggregation is over intermediate states, - // use merge - acc.merge_batch(values, group_indices, None, total_num_groups)?; - } + // if aggregation is over intermediate states, + // use merge + acc.merge_batch(values, group_indices, None, total_num_groups)?; } } - } */ + } Ok(()) } } @@ -609,3 +588,9 @@ fn evaluate_group_by( }) .collect() } + +impl RecordBatchStream for InlineAggregateStream { + fn schema(&self) -> SchemaRef { + Arc::clone(&self.schema) + } +} diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs index fca0f1d38019b..208873256afae 100644 --- a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs +++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs @@ -131,19 +131,33 @@ pub struct InlineAggregateExec { /// expressions from protobuf for final aggregate. pub input_schema: SchemaRef, cache: PlanProperties, + required_input_ordering: Vec>, } impl InlineAggregateExec { + /// Try to create an InlineAggregateExec from a standard AggregateExec. + /// + /// Returns None if the aggregate cannot be converted (e.g., not sorted, uses grouping sets). pub fn try_new_from_aggregate(aggregate: &AggregateExec) -> Option { - if matches!(aggregate.input_order_mode(), InputOrderMode::Sorted) { + // Only convert Sorted aggregates + if !matches!(aggregate.input_order_mode(), InputOrderMode::Sorted) { return None; } + + // Only support Partial and Final modes let mode = match aggregate.mode() { AggregateMode::Partial => InlineAggregateMode::Partial, AggregateMode::Final => InlineAggregateMode::Final, _ => return None, }; + let group_by = aggregate.group_expr().clone(); + + // InlineAggregate doesn't support grouping sets (CUBE/ROLLUP/GROUPING SETS) + if !group_by.is_single() { + return None; + } + let aggr_expr = aggregate.aggr_expr().iter().cloned().collect(); let filter_expr = aggregate.filter_expr().iter().cloned().collect(); let limit = aggregate.limit().clone(); @@ -151,6 +165,8 @@ impl InlineAggregateExec { let schema = aggregate.schema().clone(); let input_schema = aggregate.input_schema().clone(); let cache = aggregate.cache().clone(); + let required_input_ordering = aggregate.required_input_ordering().clone(); + Some(Self { mode, group_by, @@ -161,9 +177,22 @@ impl InlineAggregateExec { schema, input_schema, cache, + required_input_ordering, }) } + pub fn mode(&self) -> &InlineAggregateMode { + &self.mode + } + + pub fn limit(&self) -> Option { + self.limit + } + + pub fn aggr_expr(&self) -> &[Arc] { + &self.aggr_expr + } + pub fn input(&self) -> &Arc { &self.input } @@ -206,7 +235,7 @@ impl ExecutionPlan for InlineAggregateExec { } fn required_input_ordering(&self) -> Vec> { - vec![] + self.required_input_ordering.clone() } fn maintains_input_order(&self) -> Vec { @@ -231,6 +260,7 @@ impl ExecutionPlan for InlineAggregateExec { schema: self.schema.clone(), input_schema: self.input_schema.clone(), cache: self.cache.clone(), + required_input_ordering: self.required_input_ordering.clone(), }; Ok(Arc::new(result)) } @@ -240,9 +270,8 @@ impl ExecutionPlan for InlineAggregateExec { partition: usize, context: Arc, ) -> DFResult { - /* self.execute_typed(partition, context) - .map(|stream| stream.into()) */ - todo!() + let stream = inline_aggregate_stream::InlineAggregateStream::new(self, context, partition)?; + Ok(Box::pin(stream)) } fn metrics(&self) -> Option { diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs index d47c47381b7b6..f53331b45c5e9 100644 --- a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs +++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs @@ -274,7 +274,7 @@ impl SortedGroupValues { self.group_values[0].len() } - fn emit(&mut self, emit_to: EmitTo) -> DFResult> { + pub fn emit(&mut self, emit_to: EmitTo) -> DFResult> { let mut output = match emit_to { EmitTo::All => { let group_values = mem::take(&mut self.group_values); diff --git a/rust/cubestore/cubestore/src/queryplanner/optimizations/inline_aggregate_rewriter.rs b/rust/cubestore/cubestore/src/queryplanner/optimizations/inline_aggregate_rewriter.rs new file mode 100644 index 0000000000000..3a81303a249f6 --- /dev/null +++ b/rust/cubestore/cubestore/src/queryplanner/optimizations/inline_aggregate_rewriter.rs @@ -0,0 +1,30 @@ +use crate::queryplanner::inline_aggregate::InlineAggregateExec; +use datafusion::error::DataFusionError; +use datafusion::physical_plan::aggregates::AggregateExec; +use datafusion::physical_plan::ExecutionPlan; +use std::sync::Arc; + +/// Replace sorted AggregateExec node with InlineAggregateExec if possible. +/// +/// This is a single-node rewriter function designed to be used with `rewrite_physical_plan`. +/// It replaces standard hash-based aggregates with a more efficient sorted aggregation +/// implementation when: +/// - Input is sorted by grouping columns (InputOrderMode::Sorted) +/// - Mode is Partial or Final +/// - No grouping sets (CUBE/ROLLUP/GROUPING SETS) +/// +/// The InlineAggregateExec takes advantage of sorted input to: +/// - Avoid hash table overhead +/// - Enable streaming aggregation with bounded memory +/// - Process groups in order without buffering +pub fn replace_with_inline_aggregate( + plan: Arc, +) -> Result, DataFusionError> { + if let Some(agg) = plan.as_any().downcast_ref::() { + if let Some(inline_agg) = InlineAggregateExec::try_new_from_aggregate(agg) { + return Ok(Arc::new(inline_agg)); + } + } + + Ok(plan) +} diff --git a/rust/cubestore/cubestore/src/queryplanner/optimizations/mod.rs b/rust/cubestore/cubestore/src/queryplanner/optimizations/mod.rs index 1367301d3aee0..0359e64c476db 100644 --- a/rust/cubestore/cubestore/src/queryplanner/optimizations/mod.rs +++ b/rust/cubestore/cubestore/src/queryplanner/optimizations/mod.rs @@ -1,5 +1,6 @@ mod check_memory; mod distributed_partial_aggregate; +mod inline_aggregate_rewriter; pub mod rewrite_plan; pub mod rolling_optimizer; mod trace_data_loaded; @@ -10,6 +11,7 @@ use crate::queryplanner::optimizations::distributed_partial_aggregate::{ add_limit_to_workers, ensure_partition_merge, push_aggregate_to_workers, replace_suboptimal_merge_sorts, }; +use crate::queryplanner::optimizations::inline_aggregate_rewriter::replace_with_inline_aggregate; use crate::queryplanner::planning::CubeExtensionPlanner; use crate::queryplanner::pretty_printers::{pp_phys_plan_ext, PPOptions}; use crate::queryplanner::rolling::RollingWindowPlanner; @@ -141,6 +143,10 @@ fn pre_optimize_physical_plan( let p = rewrite_physical_plan(p, &mut |p| ensure_partition_merge_with_acceptable_parent(p))?; // Handles the root node case let p = ensure_partition_merge(p)?; + + // Replace sorted AggregateExec with InlineAggregateExec for better performance + let p = rewrite_physical_plan(p, &mut |p| replace_with_inline_aggregate(p))?; + Ok(p) } diff --git a/rust/cubestore/cubestore/src/queryplanner/pretty_printers.rs b/rust/cubestore/cubestore/src/queryplanner/pretty_printers.rs index fac32b4f8e63c..d9c353d1d1095 100644 --- a/rust/cubestore/cubestore/src/queryplanner/pretty_printers.rs +++ b/rust/cubestore/cubestore/src/queryplanner/pretty_printers.rs @@ -28,6 +28,7 @@ use std::sync::Arc; use crate::queryplanner::check_memory::CheckMemoryExec; use crate::queryplanner::filter_by_key_range::FilterByKeyRangeExec; +use crate::queryplanner::inline_aggregate::{InlineAggregateExec, InlineAggregateMode}; use crate::queryplanner::merge_sort::LastRowByUniqueKeyExec; use crate::queryplanner::panic::{PanicWorkerExec, PanicWorkerNode}; use crate::queryplanner::planning::{ClusterSendNode, Snapshot, WorkerExec}; @@ -604,6 +605,18 @@ fn pp_phys_plan_indented(p: &dyn ExecutionPlan, indent: usize, o: &PPOptions, ou if let Some(limit) = agg.limit() { *out += &format!(", limit: {}", limit) } + } else if let Some(agg) = a.downcast_ref::() { + let mode = match agg.mode() { + InlineAggregateMode::Partial => "Partial", + InlineAggregateMode::Final => "Final", + }; + *out += &format!("{}InlineAggregate", mode); + if o.show_aggregations { + *out += &format!(", aggs: {:?}", agg.aggr_expr()) + } + if let Some(limit) = agg.limit() { + *out += &format!(", limit: {}", limit) + } } else if let Some(l) = a.downcast_ref::() { *out += &format!("LocalLimit, n: {}", l.fetch()); } else if let Some(l) = a.downcast_ref::() { From 3b17c39b1929d09d81fd83e65e554fa53b4fedd6 Mon Sep 17 00:00:00 2001 From: Aleksandr Romanenko Date: Thu, 30 Oct 2025 19:48:15 +0100 Subject: [PATCH 7/9] in work --- rust/cubestore/Cargo.lock | 15 ++ .../cubestore-sql-tests/src/tests.rs | 1 - rust/cubestore/cubestore/Cargo.toml | 12 +- rust/cubestore/cubestore/src/config/mod.rs | 6 +- rust/cubestore/cubestore/src/import/mod.rs | 24 +- rust/cubestore/cubestore/src/lib.rs | 2 +- .../inline_aggregate/column_comparator.rs | 1 + .../inline_aggregate_stream.rs | 7 +- .../src/queryplanner/inline_aggregate/mod.rs | 60 +++++ .../inline_aggregate/sorted_group_values.rs | 151 ++++++------ .../sorted_group_values_rows.rs | 228 ++++++++++++++++++ .../src/queryplanner/optimizations/mod.rs | 2 +- .../src/queryplanner/query_executor.rs | 1 - rust/cubestore/cubestore/src/sql/mod.rs | 1 + rust/cubestore/rust-toolchain.toml | 2 +- 15 files changed, 410 insertions(+), 103 deletions(-) create mode 100644 rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values_rows.rs diff --git a/rust/cubestore/Cargo.lock b/rust/cubestore/Cargo.lock index 6a64ae7e5efe8..2bb93b2da9777 100644 --- a/rust/cubestore/Cargo.lock +++ b/rust/cubestore/Cargo.lock @@ -219,6 +219,7 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" version = "54.2.1" +source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631" dependencies = [ "arrow-arith", "arrow-array", @@ -238,6 +239,7 @@ dependencies = [ [[package]] name = "arrow-arith" version = "54.2.1" +source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631" dependencies = [ "arrow-array", "arrow-buffer", @@ -250,6 +252,7 @@ dependencies = [ [[package]] name = "arrow-array" version = "54.2.1" +source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631" dependencies = [ "ahash 0.8.11", "arrow-buffer", @@ -265,6 +268,7 @@ dependencies = [ [[package]] name = "arrow-buffer" version = "54.2.1" +source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631" dependencies = [ "bytes 1.10.1", "half 2.4.1", @@ -274,6 +278,7 @@ dependencies = [ [[package]] name = "arrow-cast" version = "54.2.1" +source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631" dependencies = [ "arrow-array", "arrow-buffer", @@ -293,6 +298,7 @@ dependencies = [ [[package]] name = "arrow-csv" version = "54.2.1" +source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631" dependencies = [ "arrow-array", "arrow-cast", @@ -307,6 +313,7 @@ dependencies = [ [[package]] name = "arrow-data" version = "54.2.1" +source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631" dependencies = [ "arrow-buffer", "arrow-schema", @@ -317,6 +324,7 @@ dependencies = [ [[package]] name = "arrow-ipc" version = "54.2.1" +source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631" dependencies = [ "arrow-array", "arrow-buffer", @@ -329,6 +337,7 @@ dependencies = [ [[package]] name = "arrow-json" version = "54.2.1" +source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631" dependencies = [ "arrow-array", "arrow-buffer", @@ -347,6 +356,7 @@ dependencies = [ [[package]] name = "arrow-ord" version = "54.2.1" +source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631" dependencies = [ "arrow-array", "arrow-buffer", @@ -358,6 +368,7 @@ dependencies = [ [[package]] name = "arrow-row" version = "54.2.1" +source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631" dependencies = [ "arrow-array", "arrow-buffer", @@ -369,6 +380,7 @@ dependencies = [ [[package]] name = "arrow-schema" version = "54.2.1" +source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631" dependencies = [ "serde", ] @@ -376,6 +388,7 @@ dependencies = [ [[package]] name = "arrow-select" version = "54.2.1" +source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631" dependencies = [ "ahash 0.8.11", "arrow-array", @@ -388,6 +401,7 @@ dependencies = [ [[package]] name = "arrow-string" version = "54.2.1" +source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631" dependencies = [ "arrow-array", "arrow-buffer", @@ -4514,6 +4528,7 @@ dependencies = [ [[package]] name = "parquet" version = "54.2.1" +source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631" dependencies = [ "aes-gcm", "ahash 0.8.11", diff --git a/rust/cubestore/cubestore-sql-tests/src/tests.rs b/rust/cubestore/cubestore-sql-tests/src/tests.rs index 253c1c170d0c5..7b1a5d1bbff67 100644 --- a/rust/cubestore/cubestore-sql-tests/src/tests.rs +++ b/rust/cubestore/cubestore-sql-tests/src/tests.rs @@ -8339,7 +8339,6 @@ async fn assert_limit_pushdown_using_search_string( .unwrap(); match &res.get_rows()[1].values()[2] { TableValue::String(s) => { - println!("!! plan {}", s); if let Some(ind) = expected_index { if s.find(ind).is_none() { return Err(format!( diff --git a/rust/cubestore/cubestore/Cargo.toml b/rust/cubestore/cubestore/Cargo.toml index 83834c7a7e827..b7219248c3007 100644 --- a/rust/cubestore/cubestore/Cargo.toml +++ b/rust/cubestore/cubestore/Cargo.toml @@ -28,10 +28,10 @@ cubezetasketch = { path = "../cubezetasketch" } cubedatasketches = { path = "../cubedatasketches" } cubeshared = { path = "../../cubeshared" } cuberpc = { path = "../cuberpc" } -datafusion = { path = "/Users/war/cube_projects/new_cube/arrow-datafusion/datafusion/core/", features = ["serde"] } -datafusion-datasource = { path = "/Users/war/cube_projects/new_cube/arrow-datafusion/datafusion/datasource/" } -datafusion-proto = { path = "/Users/war/cube_projects/new_cube/arrow-datafusion/datafusion/proto/" } -datafusion-proto-common = { path = "/Users/war/cube_projects/new_cube/arrow-datafusion/datafusion/proto-common/" } +datafusion = { path = "/Users/aleksandrromanenko/cube_projects/new_cube/arrow-datafusion/datafusion/core/", features = ["serde"] } +datafusion-datasource = { path = "/Users/aleksandrromanenko/cube_projects/new_cube/arrow-datafusion/datafusion/datasource/" } +datafusion-proto = { path = "/Users/aleksandrromanenko/cube_projects/new_cube/arrow-datafusion/datafusion/proto/" } +datafusion-proto-common = { path = "/Users/aleksandrromanenko/cube_projects/new_cube/arrow-datafusion/datafusion/proto-common/" } csv = "1.1.3" bytes = "1.6.0" serde_json = "1.0.56" @@ -120,8 +120,8 @@ sasl2-sys = { version = "0.1.6", features = ["vendored"] } rdkafka = { version = "0.29.0", features = ["cmake-build"] } [target.'cfg(target_os = "macos")'.dependencies] -#rdkafka = { version = "0.29.0", features = ["ssl", "gssapi"] } -#sasl2-sys = { version = "0.1.6", features = ["vendored"] } +rdkafka = { version = "0.29.0", features = ["ssl", "gssapi"] } +sasl2-sys = { version = "0.1.6", features = ["vendored"] } [dev-dependencies] pretty_assertions = "0.7.1" diff --git a/rust/cubestore/cubestore/src/config/mod.rs b/rust/cubestore/cubestore/src/config/mod.rs index f378a7e376767..31c6bf4a9458d 100644 --- a/rust/cubestore/cubestore/src/config/mod.rs +++ b/rust/cubestore/cubestore/src/config/mod.rs @@ -36,8 +36,8 @@ use crate::sql::{SqlService, SqlServiceImpl}; use crate::sql::{TableExtensionService, TableExtensionServiceImpl}; use crate::store::compaction::{CompactionService, CompactionServiceImpl}; use crate::store::{ChunkDataStore, ChunkStore, WALDataStore, WALStore}; -/* use crate::streaming::kafka::{KafkaClientService, KafkaClientServiceImpl}; -use crate::streaming::{KsqlClient, KsqlClientImpl, StreamingService, StreamingServiceImpl}; */ +use crate::streaming::kafka::{KafkaClientService, KafkaClientServiceImpl}; +use crate::streaming::{KsqlClient, KsqlClientImpl, StreamingService, StreamingServiceImpl}; use crate::table::parquet::{ CubestoreMetadataCacheFactory, CubestoreMetadataCacheFactoryImpl, CubestoreParquetMetadataCache, CubestoreParquetMetadataCacheImpl, @@ -2194,7 +2194,7 @@ impl Config { .register_typed::(async move |i| { ImportServiceImpl::new( i.get_service_typed().await, - //i.get_service_typed().await, + i.get_service_typed().await, i.get_service_typed().await, i.get_service_typed().await, i.get_service_typed().await, diff --git a/rust/cubestore/cubestore/src/import/mod.rs b/rust/cubestore/cubestore/src/import/mod.rs index 8a2c4b811504f..f994aeee54301 100644 --- a/rust/cubestore/cubestore/src/import/mod.rs +++ b/rust/cubestore/cubestore/src/import/mod.rs @@ -36,7 +36,7 @@ use crate::queryplanner::trace_data_loaded::DataLoadedSize; use crate::remotefs::RemoteFs; use crate::sql::timestamp_from_string; use crate::store::ChunkDataStore; -//use crate::streaming::StreamingService; +use crate::streaming::StreamingService; use crate::table::data::{append_row, create_array_builders}; use crate::table::{Row, TableValue}; use crate::util::batch_memory::columns_vec_buffer_size; @@ -517,7 +517,7 @@ crate::di_service!(MockImportService, [ImportService]); pub struct ImportServiceImpl { meta_store: Arc, - //streaming_service: Arc, + streaming_service: Arc, chunk_store: Arc, remote_fs: Arc, config_obj: Arc, @@ -530,7 +530,7 @@ crate::di_service!(ImportServiceImpl, [ImportService]); impl ImportServiceImpl { pub fn new( meta_store: Arc, - //streaming_service: Arc, + streaming_service: Arc, chunk_store: Arc, remote_fs: Arc, config_obj: Arc, @@ -539,7 +539,7 @@ impl ImportServiceImpl { ) -> Arc { Arc::new(ImportServiceImpl { meta_store, - //streaming_service, + streaming_service, chunk_store, remote_fs, config_obj, @@ -823,13 +823,13 @@ impl ImportService for ImportServiceImpl { table, location ))); } - /* if Table::is_stream_location(location) { + if Table::is_stream_location(location) { self.streaming_service.stream_table(table, location).await?; - } else { */ - self.do_import(&table, *format, location, data_loaded_size.clone()) - .await?; - self.drop_temp_uploads(&location).await?; - //} + } else { + self.do_import(&table, *format, location, data_loaded_size.clone()) + .await?; + self.drop_temp_uploads(&location).await?; + } Ok(()) } @@ -840,11 +840,11 @@ impl ImportService for ImportServiceImpl { location: &str, ) -> Result<(), CubeError> { let table = self.meta_store.get_table_by_id(table_id).await?; - /* if Table::is_stream_location(location) { + if Table::is_stream_location(location) { self.streaming_service .validate_table_location(table, location) .await?; - } */ + } Ok(()) } diff --git a/rust/cubestore/cubestore/src/lib.rs b/rust/cubestore/cubestore/src/lib.rs index c79c44fd4b2e7..bb9e124341848 100644 --- a/rust/cubestore/cubestore/src/lib.rs +++ b/rust/cubestore/cubestore/src/lib.rs @@ -44,7 +44,7 @@ pub mod scheduler; pub mod shared; pub mod sql; pub mod store; -//pub mod streaming; +pub mod streaming; pub mod sys; pub mod table; pub mod telemetry; diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/column_comparator.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/column_comparator.rs index df4e0c12a4e73..8910861c17f3d 100644 --- a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/column_comparator.rs +++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/column_comparator.rs @@ -1,4 +1,5 @@ use datafusion::arrow::array::*; +use datafusion::arrow::buffer::BooleanBuffer; use datafusion::arrow::datatypes::*; use std::marker::PhantomData; diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/inline_aggregate_stream.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/inline_aggregate_stream.rs index fc29dc584b45d..56f732a716ab2 100644 --- a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/inline_aggregate_stream.rs +++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/inline_aggregate_stream.rs @@ -7,7 +7,6 @@ use crate::metastore::multi_index::MultiPartition; use crate::metastore::table::Table; use crate::metastore::{Column, ColumnType, IdRow, Index, Partition}; use crate::queryplanner::filter_by_key_range::FilterByKeyRangeExec; -use crate::queryplanner::inline_aggregate::sorted_group_values::SortedGroupValues; use crate::queryplanner::merge_sort::LastRowByUniqueKeyExec; use crate::queryplanner::metadata_cache::{MetadataCacheFactory, NoopParquetMetadataCache}; use crate::queryplanner::optimizations::{CubeQueryPlanner, PreOptimizeRule}; @@ -71,6 +70,7 @@ use datafusion::physical_optimizer::sanity_checker::SanityCheckPlan; use datafusion::physical_optimizer::topk_aggregation::TopKAggregation; use datafusion::physical_optimizer::update_aggr_exprs::OptimizeAggregateOrder; use datafusion::physical_optimizer::PhysicalOptimizerRule; +use datafusion::physical_plan::aggregates::group_values::GroupValues; use datafusion::physical_plan::aggregates::*; use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec; use datafusion::physical_plan::empty::EmptyExec; @@ -108,6 +108,7 @@ use std::time::SystemTime; use tarpc::context::current; use tracing::{instrument, Instrument}; +use super::new_sorted_group_values; use super::InlineAggregateExec; use super::InlineAggregateMode; @@ -136,7 +137,7 @@ pub(crate) struct InlineAggregateStream { input_done: bool, accumulators: Vec>, - group_values: SortedGroupValues, + group_values: Box, current_group_indices: Vec, } @@ -191,7 +192,7 @@ impl InlineAggregateStream { let exec_state = ExecutionState::ReadingInput; let current_group_indices = Vec::with_capacity(batch_size); - let group_values = SortedGroupValues::try_new(group_schema)?; + let group_values = new_sorted_group_values(group_schema)?; Ok(InlineAggregateStream { schema: agg_schema, diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs index 208873256afae..74866b34065c3 100644 --- a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs +++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs @@ -1,6 +1,11 @@ mod column_comparator; mod inline_aggregate_stream; mod sorted_group_values; +mod sorted_group_values_rows; + +pub use sorted_group_values::SortedGroupValues; +pub use sorted_group_values_rows::SortedGroupValuesRows; + use crate::cluster::{ pick_worker_by_ids, pick_worker_by_partitions, Cluster, WorkerPlanningParams, }; @@ -72,6 +77,7 @@ use datafusion::physical_optimizer::sanity_checker::SanityCheckPlan; use datafusion::physical_optimizer::topk_aggregation::TopKAggregation; use datafusion::physical_optimizer::update_aggr_exprs::OptimizeAggregateOrder; use datafusion::physical_optimizer::PhysicalOptimizerRule; +use datafusion::physical_plan::aggregates::group_values::GroupValues; use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec; use datafusion::physical_plan::empty::EmptyExec; use datafusion::physical_plan::execution_plan::{Boundedness, CardinalityEffect, EmissionType}; @@ -309,3 +315,57 @@ impl ExecutionPlan for InlineAggregateExec { CardinalityEffect::LowerEqual } } + +/// Creates a new [`GroupValues`] implementation optimized for sorted input data +/// +/// Chooses between: +/// - [`SortedGroupValues`]: Fast column-based implementation for supported types +/// - [`SortedGroupValuesRows`]: Row-based fallback for all other types (Boolean, Struct, List, etc.) +pub fn new_sorted_group_values(schema: SchemaRef) -> DFResult> { + // Check if all fields are supported by the column-based implementation + if supported_schema(schema.as_ref()) { + Ok(Box::new(SortedGroupValues::try_new(schema)?)) + } else { + Ok(Box::new(SortedGroupValuesRows::try_new(schema)?)) + } +} + +/// Returns true if the schema is supported by [`SortedGroupValues`] (column-based implementation) +fn supported_schema(schema: &datafusion::arrow::datatypes::Schema) -> bool { + schema + .fields() + .iter() + .map(|f| f.data_type()) + .all(supported_type) +} + +/// Returns true if the data type is supported by [`SortedGroupValues`] +/// +/// Types not in this list will use the row-based [`SortedGroupValuesRows`] implementation +fn supported_type(data_type: &DataType) -> bool { + matches!( + *data_type, + DataType::Int8 + | DataType::Int16 + | DataType::Int32 + | DataType::Int64 + | DataType::UInt8 + | DataType::UInt16 + | DataType::UInt32 + | DataType::UInt64 + | DataType::Float32 + | DataType::Float64 + | DataType::Decimal128(_, _) + | DataType::Utf8 + | DataType::LargeUtf8 + | DataType::Binary + | DataType::LargeBinary + | DataType::Date32 + | DataType::Date64 + | DataType::Time32(_) + | DataType::Time64(_) + | DataType::Timestamp(_, _) + | DataType::Utf8View + | DataType::BinaryView + ) +} diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs index f53331b45c5e9..d9064aaf9ce16 100644 --- a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs +++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs @@ -20,6 +20,7 @@ use datafusion::physical_expr::binary_map::OutputType; use datafusion::physical_plan::aggregates::group_values::multi_group_by::{ ByteGroupValueBuilder, ByteViewGroupValueBuilder, PrimitiveGroupValueBuilder, }; +use datafusion::physical_plan::aggregates::group_values::GroupValues; use crate::queryplanner::inline_aggregate::column_comparator::ColumnComparator; use crate::{ @@ -70,7 +71,78 @@ impl SortedGroupValues { }) } - pub fn intern(&mut self, cols: &[ArrayRef], groups: &mut Vec) -> DFResult<()> { + fn intern_impl(&mut self, cols: &[ArrayRef], groups: &mut Vec) -> DFResult<()> { + let n_rows = cols[0].len(); + groups.clear(); + + if n_rows == 0 { + return Ok(()); + } + + // Handle first row - compare with last group or create new group + let first_group_idx = self.make_new_group_if_needed(cols, 0); + groups.push(first_group_idx); + + if n_rows == 1 { + return Ok(()); + } + + // Prepare buffer for vectorized comparison + self.equal_to_results.resize(n_rows - 1, true); + self.equal_to_results[..n_rows - 1].fill(true); + + // Vectorized comparison: compare row[i] with row[i+1] for all columns + for (col, comparator) in cols.iter().zip(&self.comparators) { + comparator.compare_adjacent(col, &mut self.equal_to_results[..n_rows - 1]); + } + + // Build groups based on comparison results + let mut current_group_idx = first_group_idx; + for i in 0..n_rows - 1 { + if !self.equal_to_results[i] { + // Group boundary detected - add new group + for (col_idx, group_value) in self.group_values.iter_mut().enumerate() { + group_value.append_val(&cols[col_idx], i + 1); + } + current_group_idx = self.group_values[0].len() - 1; + } + groups.push(current_group_idx); + } + + Ok(()) + } + + /// Compare the specified row with the last group and create a new group if different. + /// + /// This is used to handle the first row of a batch, which needs to be compared + /// with the last group from the previous batch to detect group boundaries across batches. + /// + /// Returns the group index for this row. + fn make_new_group_if_needed(&mut self, cols: &[ArrayRef], row: usize) -> usize { + let new_group_needed = if self.group_values[0].len() == 0 { + // No groups yet - always create first group + true + } else { + // Compare with last group - if any column differs, need new group + self.group_values.iter().enumerate().any(|(i, group_val)| { + !group_val.equal_to(self.group_values[0].len() - 1, &cols[i], row) + }) + }; + + if new_group_needed { + // Add new group with values from this row + for (i, group_value) in self.group_values.iter_mut().enumerate() { + group_value.append_val(&cols[i], row); + } + } + + // Return index of the group (either newly created or existing last group) + self.group_values[0].len() - 1 + } +} + +impl GroupValues for SortedGroupValues { + fn intern(&mut self, cols: &[ArrayRef], groups: &mut Vec) -> DFResult<()> { if self.group_values.is_empty() { let mut v = Vec::with_capacity(cols.len()); let mut comparators = Vec::with_capacity(cols.len()); @@ -257,16 +329,16 @@ impl SortedGroupValues { self.intern_impl(cols, groups) } - pub fn size(&self) -> usize { + fn size(&self) -> usize { let group_values_size: usize = self.group_values.iter().map(|v| v.size()).sum(); group_values_size } - pub fn is_empty(&self) -> bool { + fn is_empty(&self) -> bool { self.len() == 0 } - pub fn len(&self) -> usize { + fn len(&self) -> usize { if self.group_values.is_empty() { return 0; } @@ -274,7 +346,7 @@ impl SortedGroupValues { self.group_values[0].len() } - pub fn emit(&mut self, emit_to: EmitTo) -> DFResult> { + fn emit(&mut self, emit_to: EmitTo) -> DFResult> { let mut output = match emit_to { EmitTo::All => { let group_values = mem::take(&mut self.group_values); @@ -318,73 +390,4 @@ impl SortedGroupValues { self.rows_inds.clear(); self.equal_to_results.clear(); } - - fn intern_impl(&mut self, cols: &[ArrayRef], groups: &mut Vec) -> DFResult<()> { - let n_rows = cols[0].len(); - groups.clear(); - - if n_rows == 0 { - return Ok(()); - } - - // Handle first row - compare with last group or create new group - let first_group_idx = self.make_new_group_if_needed(cols, 0); - groups.push(first_group_idx); - - if n_rows == 1 { - return Ok(()); - } - - // Prepare buffer for vectorized comparison - self.equal_to_results.resize(n_rows - 1, true); - self.equal_to_results[..n_rows - 1].fill(true); - - // Vectorized comparison: compare row[i] with row[i+1] for all columns - for (col, comparator) in cols.iter().zip(&self.comparators) { - comparator.compare_adjacent(col, &mut self.equal_to_results[..n_rows - 1]); - } - - // Build groups based on comparison results - let mut current_group_idx = first_group_idx; - for i in 0..n_rows - 1 { - if !self.equal_to_results[i] { - // Group boundary detected - add new group - for (col_idx, group_value) in self.group_values.iter_mut().enumerate() { - group_value.append_val(&cols[col_idx], i + 1); - } - current_group_idx = self.group_values[0].len() - 1; - } - groups.push(current_group_idx); - } - - Ok(()) - } - - /// Compare the specified row with the last group and create a new group if different. - /// - /// This is used to handle the first row of a batch, which needs to be compared - /// with the last group from the previous batch to detect group boundaries across batches. - /// - /// Returns the group index for this row. - fn make_new_group_if_needed(&mut self, cols: &[ArrayRef], row: usize) -> usize { - let new_group_needed = if self.group_values[0].len() == 0 { - // No groups yet - always create first group - true - } else { - // Compare with last group - if any column differs, need new group - self.group_values.iter().enumerate().any(|(i, group_val)| { - !group_val.equal_to(self.group_values[0].len() - 1, &cols[i], row) - }) - }; - - if new_group_needed { - // Add new group with values from this row - for (i, group_value) in self.group_values.iter_mut().enumerate() { - group_value.append_val(&cols[i], row); - } - } - - // Return index of the group (either newly created or existing last group) - self.group_values[0].len() - 1 - } } diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values_rows.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values_rows.rs new file mode 100644 index 0000000000000..199cce192e587 --- /dev/null +++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values_rows.rs @@ -0,0 +1,228 @@ +use datafusion::logical_expr::EmitTo; +use std::mem::{self, size_of}; + +use datafusion::arrow::array::{Array, ArrayRef, ListArray, RecordBatch, StructArray}; +use datafusion::arrow::compute::cast; +use datafusion::arrow::datatypes::{DataType, SchemaRef}; +use datafusion::arrow::row::{RowConverter, Rows, SortField}; +use datafusion::dfschema::internal_err; +use datafusion::error::{DataFusionError, Result as DFResult}; +use datafusion::physical_plan::aggregates::group_values::GroupValues; + +use std::sync::Arc; + +/// A [`GroupValues`] implementation optimized for sorted input data +/// +/// This is a specialized implementation for sorted data that: +/// - Does not use a hash table (unlike `GroupValuesRows`) +/// - Detects group boundaries by comparing adjacent rows +/// - Works for any data type including Boolean, Struct, List, etc. +/// +/// It uses the arrow-rs [`Rows`] format for efficient row-wise storage and comparison. +pub struct SortedGroupValuesRows { + /// The output schema + schema: SchemaRef, + + /// Converter for the group values + row_converter: RowConverter, + + /// The actual group by values, stored in arrow [`Row`] format. + /// `group_values[i]` holds the group value for group_index `i`. + /// + /// The row format is used to compare group keys quickly and store + /// them efficiently in memory. Quick comparison is especially + /// important for multi-column group keys. + /// + /// [`Row`]: arrow::row::Row + group_values: Option, + + /// Reused buffer to store rows + rows_buffer: Rows, +} + +impl SortedGroupValuesRows { + pub fn try_new(schema: SchemaRef) -> DFResult { + let row_converter = RowConverter::new( + schema + .fields() + .iter() + .map(|f| SortField::new(f.data_type().clone())) + .collect(), + )?; + + let starting_rows_capacity = 1000; + let starting_data_capacity = 64 * starting_rows_capacity; + let rows_buffer = + row_converter.empty_rows(starting_rows_capacity, starting_data_capacity); + + Ok(Self { + schema, + row_converter, + group_values: None, + rows_buffer, + }) + } + + fn intern_impl(&mut self, cols: &[ArrayRef], groups: &mut Vec) -> DFResult<()> { + // Convert the group keys into the row format + self.rows_buffer.clear(); + self.row_converter.append(&mut self.rows_buffer, cols)?; + let n_rows = self.rows_buffer.num_rows(); + + groups.clear(); + + if n_rows == 0 { + return Ok(()); + } + + let mut group_values = match self.group_values.take() { + Some(group_values) => group_values, + None => self.row_converter.empty_rows(0, 0), + }; + + // Handle first row - compare with last group or create new group + let new_group_needed = if group_values.num_rows() == 0 { + // No groups yet - always create first group + true + } else { + // Compare with last group - if differs, need new group + let last_group_idx = group_values.num_rows() - 1; + group_values.row(last_group_idx) != self.rows_buffer.row(0) + }; + + if new_group_needed { + // Add new group with values from first row + group_values.push(self.rows_buffer.row(0)); + } + + let first_group_idx = group_values.num_rows() - 1; + groups.push(first_group_idx); + + if n_rows == 1 { + self.group_values = Some(group_values); + return Ok(()); + } + + // Build groups based on comparison of adjacent rows + let mut current_group_idx = first_group_idx; + for i in 0..n_rows - 1 { + // Compare row[i] with row[i+1] + if self.rows_buffer.row(i) != self.rows_buffer.row(i + 1) { + // Group boundary detected - add new group + group_values.push(self.rows_buffer.row(i + 1)); + current_group_idx = group_values.num_rows() - 1; + } + groups.push(current_group_idx); + } + + self.group_values = Some(group_values); + Ok(()) + } +} + +impl GroupValues for SortedGroupValuesRows { + fn intern(&mut self, cols: &[ArrayRef], groups: &mut Vec) -> DFResult<()> { + self.intern_impl(cols, groups) + } + + fn size(&self) -> usize { + let group_values_size = self.group_values.as_ref().map(|v| v.size()).unwrap_or(0); + self.row_converter.size() + group_values_size + self.rows_buffer.size() + } + + fn is_empty(&self) -> bool { + self.len() == 0 + } + + fn len(&self) -> usize { + self.group_values + .as_ref() + .map(|group_values| group_values.num_rows()) + .unwrap_or(0) + } + + fn emit(&mut self, emit_to: EmitTo) -> DFResult> { + let mut group_values = self + .group_values + .take() + .expect("Can not emit from empty rows"); + + let mut output = match emit_to { + EmitTo::All => { + let output = self.row_converter.convert_rows(&group_values)?; + group_values.clear(); + output + } + EmitTo::First(n) => { + let groups_rows = group_values.iter().take(n); + let output = self.row_converter.convert_rows(groups_rows)?; + // Clear out first n group keys by copying them to a new Rows. + let mut new_group_values = self.row_converter.empty_rows(0, 0); + for row in group_values.iter().skip(n) { + new_group_values.push(row); + } + std::mem::swap(&mut new_group_values, &mut group_values); + output + } + }; + + // Handle dictionary encoding for output + for (field, array) in self.schema.fields.iter().zip(&mut output) { + let expected = field.data_type(); + *array = + dictionary_encode_if_necessary(Arc::::clone(array), expected)?; + } + + self.group_values = Some(group_values); + Ok(output) + } + + fn clear_shrink(&mut self, _batch: &RecordBatch) { + self.group_values = self.group_values.take().map(|mut rows| { + rows.clear(); + rows + }); + } +} + +fn dictionary_encode_if_necessary( + array: ArrayRef, + expected: &DataType, +) -> DFResult { + match (expected, array.data_type()) { + (DataType::Struct(expected_fields), _) => { + let struct_array = array.as_any().downcast_ref::().unwrap(); + let arrays = expected_fields + .iter() + .zip(struct_array.columns()) + .map(|(expected_field, column)| { + dictionary_encode_if_necessary( + Arc::::clone(column), + expected_field.data_type(), + ) + }) + .collect::>>()?; + + Ok(Arc::new(StructArray::try_new( + expected_fields.clone(), + arrays, + struct_array.nulls().cloned(), + )?)) + } + (DataType::List(expected_field), &DataType::List(_)) => { + let list = array.as_any().downcast_ref::().unwrap(); + + Ok(Arc::new(ListArray::try_new( + Arc::::clone(expected_field), + list.offsets().clone(), + dictionary_encode_if_necessary( + Arc::::clone(list.values()), + expected_field.data_type(), + )?, + list.nulls().cloned(), + )?)) + } + (DataType::Dictionary(_, _), _) => Ok(cast(array.as_ref(), expected)?), + (_, _) => Ok(Arc::::clone(&array)), + } +} diff --git a/rust/cubestore/cubestore/src/queryplanner/optimizations/mod.rs b/rust/cubestore/cubestore/src/queryplanner/optimizations/mod.rs index 0359e64c476db..5746261938e1f 100644 --- a/rust/cubestore/cubestore/src/queryplanner/optimizations/mod.rs +++ b/rust/cubestore/cubestore/src/queryplanner/optimizations/mod.rs @@ -145,7 +145,7 @@ fn pre_optimize_physical_plan( let p = ensure_partition_merge(p)?; // Replace sorted AggregateExec with InlineAggregateExec for better performance - let p = rewrite_physical_plan(p, &mut |p| replace_with_inline_aggregate(p))?; + //let p = rewrite_physical_plan(p, &mut |p| replace_with_inline_aggregate(p))?; Ok(p) } diff --git a/rust/cubestore/cubestore/src/queryplanner/query_executor.rs b/rust/cubestore/cubestore/src/queryplanner/query_executor.rs index 7072ad8c59180..ccb164a15a8a7 100644 --- a/rust/cubestore/cubestore/src/queryplanner/query_executor.rs +++ b/rust/cubestore/cubestore/src/queryplanner/query_executor.rs @@ -1178,7 +1178,6 @@ impl ExecutionPlan for CubeTableExec { mut partition: usize, context: Arc, ) -> Result { - println!("!!! Table exec: {}, {}", self.partition_execs.len(), partition); let exec = self .partition_execs .iter() diff --git a/rust/cubestore/cubestore/src/sql/mod.rs b/rust/cubestore/cubestore/src/sql/mod.rs index 2cacd97ffe6af..67d2fb351967e 100644 --- a/rust/cubestore/cubestore/src/sql/mod.rs +++ b/rust/cubestore/cubestore/src/sql/mod.rs @@ -2846,6 +2846,7 @@ mod tests { let mut bools = Vec::new(); for i in 0..1000 { bools.push(i % (batch + 1) == 0); + } let values = bools.into_iter().map(|b| format!("({})", b)).join(", "); diff --git a/rust/cubestore/rust-toolchain.toml b/rust/cubestore/rust-toolchain.toml index ad8132da3e1bc..935f99e36558c 100644 --- a/rust/cubestore/rust-toolchain.toml +++ b/rust/cubestore/rust-toolchain.toml @@ -1,4 +1,4 @@ [toolchain] -channel = "nightly-2025-08-01" +channel = "nightly-2024-10-30" components = ["rustfmt", "clippy"] profile = "minimal" From fb5799e1ac5f919f96e803faa38a81ee39390928 Mon Sep 17 00:00:00 2001 From: Aleksandr Romanenko Date: Thu, 30 Oct 2025 19:50:58 +0100 Subject: [PATCH 8/9] in work --- .../queryplanner/inline_aggregate/column_comparator.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/column_comparator.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/column_comparator.rs index 8910861c17f3d..2e3c6bb50e07d 100644 --- a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/column_comparator.rs +++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/column_comparator.rs @@ -119,8 +119,8 @@ where } } else { // Use iterator which handles nulls efficiently - let mut iter1 = array.iter(); - let mut iter2 = array.iter().skip(1); + let iter1 = array.iter(); + let iter2 = array.iter().skip(1); for (i, (v1, v2)) in iter1.zip(iter2).enumerate() { if equal_results[i] { @@ -174,8 +174,8 @@ where } } else { // Handle nulls via iterator - let mut iter1 = array.iter(); - let mut iter2 = array.iter().skip(1); + let iter1 = array.iter(); + let iter2 = array.iter().skip(1); for (i, (v1, v2)) in iter1.zip(iter2).enumerate() { if equal_results[i] { From 5762d5c802718bfa59788841380e635712ee1399 Mon Sep 17 00:00:00 2001 From: Aleksandr Romanenko Date: Fri, 31 Oct 2025 14:32:24 +0100 Subject: [PATCH 9/9] in work --- rust/cubestore/Cargo.lock | 26 +++ rust/cubestore/cubedatasketches/src/error.rs | 8 +- rust/cubestore/cubedatasketches/src/native.rs | 16 +- rust/cubestore/cubehll/src/error.rs | 8 +- rust/cubestore/cubehll/src/instance.rs | 156 +++++++++--------- rust/cubestore/cubehll/src/sketch.rs | 24 +-- .../cubestore-sql-tests/src/benches.rs | 36 ++-- .../cubestore-sql-tests/src/files.rs | 6 +- rust/cubestore/cubestore-sql-tests/src/lib.rs | 2 +- .../cubestore-sql-tests/src/multiproc.rs | 6 +- .../cubestore/cubestore-sql-tests/src/rows.rs | 2 +- .../cubestore-sql-tests/src/tests.rs | 108 ++++++------ .../cubestore-sql-tests/tests/cluster.rs | 2 +- .../cubestore-sql-tests/tests/migration.rs | 4 +- rust/cubestore/cubestore/Cargo.toml | 8 +- .../cubestore/benches/cachestore_queue.rs | 8 +- .../cubestore/cubestore/src/bin/cubestored.rs | 2 +- rust/cubestore/cubestore/src/config/mod.rs | 46 +++--- .../inline_aggregate/column_comparator.rs | 1 - .../inline_aggregate_stream.rs | 148 +---------------- .../src/queryplanner/inline_aggregate/mod.rs | 102 ++---------- .../inline_aggregate/sorted_group_values.rs | 7 +- .../sorted_group_values_rows.rs | 15 +- .../cubestore/src/queryplanner/mod.rs | 2 +- .../src/queryplanner/optimizations/mod.rs | 2 +- .../src/queryplanner/physical_plan_flags.rs | 10 +- .../src/queryplanner/pretty_printers.rs | 2 +- rust/cubestore/cubezetasketch/src/data.rs | 12 +- .../cubezetasketch/src/difference_encoding.rs | 10 +- rust/cubestore/cubezetasketch/src/encoding.rs | 27 ++- rust/cubestore/cubezetasketch/src/error.rs | 10 +- rust/cubestore/cubezetasketch/src/normal.rs | 18 +- rust/cubestore/cubezetasketch/src/sketch.rs | 28 ++-- rust/cubestore/cubezetasketch/src/sparse.rs | 30 ++-- rust/cubestore/cubezetasketch/src/state.rs | 20 +-- 35 files changed, 356 insertions(+), 556 deletions(-) diff --git a/rust/cubestore/Cargo.lock b/rust/cubestore/Cargo.lock index 2bb93b2da9777..6f2149f1d840b 100644 --- a/rust/cubestore/Cargo.lock +++ b/rust/cubestore/Cargo.lock @@ -1723,6 +1723,7 @@ checksum = "c2e66c9d817f1720209181c316d28635c050fa304f9c79e47a520882661b7308" [[package]] name = "datafusion" version = "46.0.1" +source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a" dependencies = [ "arrow", "arrow-ipc", @@ -1775,6 +1776,7 @@ dependencies = [ [[package]] name = "datafusion-catalog" version = "46.0.1" +source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a" dependencies = [ "arrow", "async-trait", @@ -1793,6 +1795,7 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" version = "46.0.1" +source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a" dependencies = [ "arrow", "async-trait", @@ -1813,6 +1816,7 @@ dependencies = [ [[package]] name = "datafusion-common" version = "46.0.1" +source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a" dependencies = [ "ahash 0.8.11", "arrow", @@ -1835,6 +1839,7 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" version = "46.0.1" +source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a" dependencies = [ "log", "tokio", @@ -1843,6 +1848,7 @@ dependencies = [ [[package]] name = "datafusion-datasource" version = "46.0.1" +source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a" dependencies = [ "arrow", "async-compression 0.4.17", @@ -1875,10 +1881,12 @@ dependencies = [ [[package]] name = "datafusion-doc" version = "46.0.1" +source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a" [[package]] name = "datafusion-execution" version = "46.0.1" +source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a" dependencies = [ "arrow", "dashmap", @@ -1898,6 +1906,7 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "46.0.1" +source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a" dependencies = [ "arrow", "chrono", @@ -1917,6 +1926,7 @@ dependencies = [ [[package]] name = "datafusion-expr-common" version = "46.0.1" +source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a" dependencies = [ "arrow", "datafusion-common", @@ -1928,6 +1938,7 @@ dependencies = [ [[package]] name = "datafusion-functions" version = "46.0.1" +source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a" dependencies = [ "arrow", "arrow-buffer", @@ -1955,6 +1966,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" version = "46.0.1" +source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a" dependencies = [ "ahash 0.8.11", "arrow", @@ -1974,6 +1986,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" version = "46.0.1" +source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a" dependencies = [ "ahash 0.8.11", "arrow", @@ -1985,6 +1998,7 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" version = "46.0.1" +source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a" dependencies = [ "arrow", "arrow-ord", @@ -2004,6 +2018,7 @@ dependencies = [ [[package]] name = "datafusion-functions-table" version = "46.0.1" +source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a" dependencies = [ "arrow", "async-trait", @@ -2018,6 +2033,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window" version = "46.0.1" +source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a" dependencies = [ "datafusion-common", "datafusion-doc", @@ -2033,6 +2049,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" version = "46.0.1" +source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2041,6 +2058,7 @@ dependencies = [ [[package]] name = "datafusion-macros" version = "46.0.1" +source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a" dependencies = [ "datafusion-expr", "quote", @@ -2050,6 +2068,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "46.0.1" +source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a" dependencies = [ "arrow", "chrono", @@ -2067,6 +2086,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "46.0.1" +source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a" dependencies = [ "ahash 0.8.11", "arrow", @@ -2087,6 +2107,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" version = "46.0.1" +source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a" dependencies = [ "ahash 0.8.11", "arrow", @@ -2099,6 +2120,7 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" version = "46.0.1" +source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a" dependencies = [ "arrow", "datafusion-common", @@ -2116,6 +2138,7 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" version = "46.0.1" +source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a" dependencies = [ "ahash 0.8.11", "arrow", @@ -2147,6 +2170,7 @@ dependencies = [ [[package]] name = "datafusion-proto" version = "46.0.1" +source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a" dependencies = [ "arrow", "chrono", @@ -2161,6 +2185,7 @@ dependencies = [ [[package]] name = "datafusion-proto-common" version = "46.0.1" +source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a" dependencies = [ "arrow", "datafusion-common", @@ -2170,6 +2195,7 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "46.0.1" +source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a" dependencies = [ "arrow", "bigdecimal 0.4.8", diff --git a/rust/cubestore/cubedatasketches/src/error.rs b/rust/cubestore/cubedatasketches/src/error.rs index 1459d86f3dbaf..6ea4f5705a377 100644 --- a/rust/cubestore/cubedatasketches/src/error.rs +++ b/rust/cubestore/cubedatasketches/src/error.rs @@ -30,21 +30,21 @@ impl Display for DataSketchesError { impl DataSketchesError { pub fn new(message: Str) -> Self { - return Self { + Self { message: message.to_string(), - }; + } } } impl From for DataSketchesError { fn from(err: std::io::Error) -> Self { - return DataSketchesError::new(err); + DataSketchesError::new(err) } } #[cfg(not(target_os = "windows"))] impl From for DataSketchesError { fn from(err: dsrs::DataSketchesError) -> Self { - return DataSketchesError::new(err); + DataSketchesError::new(err) } } diff --git a/rust/cubestore/cubedatasketches/src/native.rs b/rust/cubestore/cubedatasketches/src/native.rs index 1cbec0c0ecf97..e379c43098767 100644 --- a/rust/cubestore/cubedatasketches/src/native.rs +++ b/rust/cubestore/cubedatasketches/src/native.rs @@ -37,22 +37,22 @@ impl Debug for HLLDataSketch { impl HLLDataSketch { pub fn read(data: &[u8]) -> Result { - return Ok(Self { + Ok(Self { instance: HLLSketch::deserialize(data)?, - }); + }) } pub fn cardinality(&self) -> u64 { - return self.instance.estimate().round() as u64; + self.instance.estimate().round() as u64 } pub fn get_lg_config_k(&self) -> u8 { - return self.instance.get_lg_config_k(); + self.instance.get_lg_config_k() } pub fn write(&self) -> Vec { // TODO(ovr): Better way? - self.instance.serialize().as_ref().iter().copied().collect() + self.instance.serialize().as_ref().to_vec() } } @@ -80,13 +80,13 @@ impl HLLUnionDataSketch { } pub fn get_lg_config_k(&self) -> u8 { - return self.instance.get_lg_config_k(); + self.instance.get_lg_config_k() } pub fn write(&self) -> Vec { let sketch = self.instance.sketch(HLLType::HLL_4); // TODO(ovr): Better way? - sketch.serialize().as_ref().iter().copied().collect() + sketch.serialize().as_ref().to_vec() } pub fn merge_with(&mut self, other: HLLDataSketch) -> Result<()> { @@ -107,6 +107,6 @@ impl HLLUnionDataSketch { // // This function is supposed to be exact, but it is not exact. - return 32 + k; + 32 + k } } diff --git a/rust/cubestore/cubehll/src/error.rs b/rust/cubestore/cubehll/src/error.rs index 428a00639ed0d..978bb2f114abf 100644 --- a/rust/cubestore/cubehll/src/error.rs +++ b/rust/cubestore/cubehll/src/error.rs @@ -14,20 +14,20 @@ impl Display for HllError { impl HllError { pub fn new(message: Str) -> HllError { - return HllError { + HllError { message: message.to_string(), - }; + } } } impl From for HllError { fn from(err: std::io::Error) -> Self { - return HllError::new(err); + HllError::new(err) } } impl From for HllError { fn from(err: serde_json::Error) -> Self { - return HllError::new(err); + HllError::new(err) } } diff --git a/rust/cubestore/cubehll/src/instance.rs b/rust/cubestore/cubehll/src/instance.rs index 62ff469805bea..a9149d87d74ee 100644 --- a/rust/cubestore/cubehll/src/instance.rs +++ b/rust/cubestore/cubehll/src/instance.rs @@ -36,16 +36,16 @@ pub const MAX_BUCKETS: u32 = 65536; impl HllInstance { pub fn new(num_buckets: u32) -> Result { assert!(num_buckets <= MAX_BUCKETS); - return Ok(HllInstance::Sparse(SparseHll::new(index_bit_length( + Ok(HllInstance::Sparse(SparseHll::new(index_bit_length( num_buckets, - )?)?)); + )?)?)) } pub fn num_buckets(&self) -> u32 { - return match self { + match self { Sparse(s) => number_of_buckets(s.index_bit_len), Dense(d) => number_of_buckets(d.index_bit_len), - }; + } } /// Callers must check that `num_buckets()` is the same for `self` and `other`. @@ -61,10 +61,10 @@ impl HllInstance { } pub fn index_bit_len(&self) -> u8 { - return match self { + match self { Sparse(s) => s.index_bit_len, Dense(d) => d.index_bit_len, - }; + } } /// Returns true iff `self.make_dense_if_necessary` has to be run. @@ -75,15 +75,15 @@ impl HllInstance { l.merge_with(r); // We need the make this call, but borrow checker won't let us use `self` here. // self.make_dense_if_necessary(); - return true; + true } (Dense(l), Sparse(r)) => { l.merge_with_sparse(r); - return false; + false } (l, Dense(r)) => { l.ensure_dense().merge_with(r); - return false; + false } } } @@ -122,7 +122,7 @@ impl HllInstance { "Cannot read HLL with undefined encoding".to_string(), )) } - n if 1 <= n && n <= 4 => n, + n if (1..=4).contains(&n) => n, n => { return Err(HllError::new(format!( "Unknown HLL encoding ordinal: {}", @@ -131,7 +131,7 @@ impl HllInstance { } }; let reg_width = 1 + ((data[1] & 0b11100000) >> 5); - if reg_width < 1 || 6 < reg_width { + if !(1..=6).contains(®_width) { return Err(HllError::new(format!( "Register width must be between 1 and 6, got {}", reg_width @@ -139,7 +139,7 @@ impl HllInstance { } let log_num_buckets = data[1] & 0b00011111; // Note: the upper limit in storage spec is 31, but our implementation is limited to 16. - if log_num_buckets < 4 || 16 < log_num_buckets { + if !(4..=16).contains(&log_num_buckets) { return Err(HllError::new(format!( "Log2m must be between 4 and 16, got {}", log_num_buckets @@ -158,7 +158,7 @@ impl HllInstance { data.len() ))); } - return HllInstance::new(num_buckets); + HllInstance::new(num_buckets) } ENC_EXPLICIT => { if data.len() % 8 != 0 { @@ -216,11 +216,11 @@ impl HllInstance { values.push(zeroes as u8); } - return Ok(HllInstance::Sparse(SparseHll::new_from_indices_and_values( + Ok(HllInstance::Sparse(SparseHll::new_from_indices_and_values( log_num_buckets, indices, &values, - )?)); + )?)) } ENC_SPARSE => { let mut cursor = BitCursor::new(data); @@ -231,11 +231,11 @@ impl HllInstance { indices.push((e >> reg_width) as u32); values.push((e & ((1 << reg_width) - 1)) as u8); } - return Ok(HllInstance::Sparse(SparseHll::new_from_indices_and_values( + Ok(HllInstance::Sparse(SparseHll::new_from_indices_and_values( log_num_buckets, indices, &values, - )?)); + )?)) } ENC_FULL => { let expected_bits = num_buckets * reg_width as u32; @@ -253,10 +253,10 @@ impl HllInstance { for _ in 0..num_buckets { values.push(cursor.read_bits(reg_width as usize).unwrap() as u8) } - return Ok(HllInstance::Dense(DenseHll::new_from_entries( + Ok(HllInstance::Dense(DenseHll::new_from_entries( log_num_buckets, values, - )?)); + )?)) } enc => panic!("Unhandled encoding ordinal {}", enc), } @@ -306,19 +306,19 @@ impl HllInstance { if data.is_empty() { return Err(HllError::new("hll input data is empty")); } - return match data[0] { + match data[0] { TAG_SPARSE_V2 => Ok(HllInstance::Sparse(SparseHll::read(&data[1..])?)), TAG_DENSE_V1 => Ok(HllInstance::Dense(DenseHll::read_v1(&data[1..])?)), TAG_DENSE_V2 => Ok(HllInstance::Dense(DenseHll::read(&data[1..])?)), _ => Err(HllError::new(format!("invalid hll format tag {}", data[0]))), - }; + } } pub fn write(&self) -> Vec { - return match self { + match self { Sparse(s) => s.write(), Dense(s) => s.write(), - }; + } } fn ensure_dense(&mut self) -> &mut DenseHll { @@ -379,10 +379,10 @@ impl SparseHll { pub fn new(index_bit_len: u8) -> Result { SparseHll::is_valid_bit_len(index_bit_len)?; - return Ok(SparseHll { + Ok(SparseHll { index_bit_len, entries: Vec::with_capacity(1), - }); + }) } fn new_from_indices_and_values( @@ -419,8 +419,7 @@ impl SparseHll { } // Sort by bucket index. - entries - .sort_unstable_by(|l, r| (l >> (32 - index_bit_len)).cmp(&(r >> (32 - index_bit_len)))); + entries.sort_unstable_by_key(|l| l >> (32 - index_bit_len)); Ok(SparseHll { index_bit_len, @@ -442,10 +441,10 @@ impl SparseHll { if c.position() != data.len() as u64 { return Err(HllError::new("input is too big")); } - return Ok(SparseHll { + Ok(SparseHll { index_bit_len, entries, - }); + }) } pub fn write(&self) -> Vec { @@ -459,7 +458,7 @@ impl SparseHll { for e in &self.entries { r.write_u32::(*e).unwrap(); } - return r; + r } pub fn cardinality(&self) -> u64 { @@ -468,7 +467,7 @@ impl SparseHll { // while in the sparse regime. let total_buckets = number_of_buckets(SparseHll::EXTENDED_PREFIX_BITS); let zero_buckets = total_buckets - self.entries.len() as u32; - return linear_counting(zero_buckets, total_buckets).round() as u64; + linear_counting(zero_buckets, total_buckets).round() as u64 } pub fn merge_with(&mut self, o: &SparseHll) { @@ -479,11 +478,11 @@ impl SparseHll { // TODO: this can panic if Sparse HLL had too much precision. let mut d = DenseHll::new(self.index_bit_len); self.each_bucket(|bucket, zeros| d.insert(bucket, zeros)); - return d; + d } fn estimate_in_memory_size(&self) -> usize { - return size_of::() + 32 * self.entries.capacity(); + size_of::() + 32 * self.entries.capacity() } fn each_bucket(&self, mut f: F) @@ -555,27 +554,27 @@ impl SparseHll { } result.resize(index, 0); - return result; + result } fn encode_entry(bucket_index: u32, value: u8) -> u32 { - return (bucket_index << SparseHll::VALUE_BITS) | value as u32; + (bucket_index << SparseHll::VALUE_BITS) | value as u32 } fn decode_bucket_value(entry: u32) -> u8 { - return (entry & SparseHll::VALUE_MASK) as u8; + (entry & SparseHll::VALUE_MASK) as u8 } fn decode_bucket_index(entry: u32) -> u32 { - return SparseHll::decode_bucket_index_with_bit_len(SparseHll::EXTENDED_PREFIX_BITS, entry); + SparseHll::decode_bucket_index_with_bit_len(SparseHll::EXTENDED_PREFIX_BITS, entry) } fn decode_bucket_index_with_bit_len(index_bit_len: u8, entry: u32) -> u32 { - return entry >> (32 - index_bit_len); + entry >> (32 - index_bit_len) } fn is_valid_bit_len(index_bit_len: u8) -> Result<()> { - if 1 <= index_bit_len && index_bit_len <= SparseHll::EXTENDED_PREFIX_BITS { + if (1..=SparseHll::EXTENDED_PREFIX_BITS).contains(&index_bit_len) { Ok(()) } else { Err(HllError::new(format!( @@ -615,15 +614,15 @@ impl DenseHll { pub fn new(index_bit_len: u8) -> DenseHll { DenseHll::is_valid_bit_len(index_bit_len).unwrap(); - let num_buckets = number_of_buckets(index_bit_len) as u32; - return DenseHll { + let num_buckets = number_of_buckets(index_bit_len); + DenseHll { index_bit_len, baseline: 0, baseline_count: num_buckets, deltas: vec![0; (num_buckets * DenseHll::BITS_PER_BUCKET / 8) as usize], overflow_buckets: Vec::new(), overflow_values: Vec::new(), - }; + } } pub fn new_from_entries(index_bit_len: u8, values: Vec) -> Result { @@ -674,9 +673,9 @@ impl DenseHll { pub fn read_v1(_data: &[u8]) -> Result { // TODO: implement this for completeness. Airlift can read Dense HLL in V1 format. - return Err(HllError::new( + Err(HllError::new( "reading of v1 dense sketches is not implemented", - )); + )) } pub fn read(data: &[u8]) -> Result { @@ -725,14 +724,14 @@ impl DenseHll { } } - return Ok(DenseHll { + Ok(DenseHll { index_bit_len, baseline, baseline_count, deltas, overflow_buckets, overflow_values, - }); + }) } pub fn write(&self) -> Vec { @@ -753,7 +752,7 @@ impl DenseHll { r.write_u16::(e.try_into().unwrap()).unwrap(); } r.extend_from_slice(&of_values); - return r; + r } pub fn cardinality(&self) -> u64 { @@ -774,7 +773,7 @@ impl DenseHll { } let estimate = (alpha(self.index_bit_len) * num_buckets as f64 * num_buckets as f64) / sum; - return self.correct_bias(estimate).round() as u64; + self.correct_bias(estimate).round() as u64 } pub fn merge_with_sparse(&mut self, other: &SparseHll) { @@ -819,14 +818,14 @@ impl DenseHll { if delta1 == DenseHll::MAX_DELTA { overflow_entry = self.find_overflow_entry(bucket); if let Some(oe) = overflow_entry { - value1 += self.overflow_values[oe] as u8; + value1 += self.overflow_values[oe]; } } else { overflow_entry = None } if delta2 == DenseHll::MAX_DELTA { - value2 += other.get_overflow(bucket) as u8; + value2 += other.get_overflow(bucket); } let new_value = max(value1, value2); @@ -843,7 +842,7 @@ impl DenseHll { bucket += 1; } - self.deltas[i] = new_slot as u8; + self.deltas[i] = new_slot; } self.baseline = new_baseline as u8; @@ -919,15 +918,14 @@ impl DenseHll { bias = (((raw_estimate - x0) * (y1 - y0)) / (x1 - x0)) + y0; } - return raw_estimate - bias; + raw_estimate - bias } fn find_overflow_entry(&self, bucket: u32) -> Option { - return self - .overflow_buckets + self.overflow_buckets .iter() .find_position(|x| **x == bucket) - .map(|x| x.0); + .map(|x| x.0) } fn adjust_baseline_if_needed(&mut self) { @@ -987,7 +985,7 @@ impl DenseHll { } else if let Some(oe) = overflow_entry { self.remove_overflow(oe); } - return delta as u8; + delta } fn add_overflow(&mut self, bucket: u32, overflow: u8) { @@ -1020,7 +1018,7 @@ impl DenseHll { if delta == DenseHll::MAX_DELTA as u32 { delta += self.get_overflow(bucket) as u32; } - return self.baseline as u32 + delta; + self.baseline as u32 + delta } fn get_overflow(&self, bucket: u32) -> u8 { @@ -1029,41 +1027,41 @@ impl DenseHll { return self.overflow_values[i]; } } - return 0; + 0 } fn get_delta(&self, bucket: u32) -> u8 { - return DenseHll::get_delta_impl(&self.deltas, bucket); + DenseHll::get_delta_impl(&self.deltas, bucket) } fn get_delta_impl(deltas: &[u8], bucket: u32) -> u8 { let slot = DenseHll::bucket_to_slot(bucket) as usize; - return (deltas[slot] >> DenseHll::shift_for_bucket(bucket)) & DenseHll::BUCKET_MASK; + (deltas[slot] >> DenseHll::shift_for_bucket(bucket)) & DenseHll::BUCKET_MASK } fn set_delta(&mut self, bucket: u32, value: u8) { let slot = DenseHll::bucket_to_slot(bucket) as usize; // clear the old value - let clear_mask = (DenseHll::BUCKET_MASK << DenseHll::shift_for_bucket(bucket)) as u8; + let clear_mask = DenseHll::BUCKET_MASK << DenseHll::shift_for_bucket(bucket); self.deltas[slot] &= !clear_mask; // set the new value - let set_mask = (value << DenseHll::shift_for_bucket(bucket)) as u8; + let set_mask = value << DenseHll::shift_for_bucket(bucket); self.deltas[slot] |= set_mask; } fn bucket_to_slot(bucket: u32) -> u32 { - return bucket >> 1; + bucket >> 1 } fn shift_for_bucket(bucket: u32) -> u32 { // ((1 - bucket) % 2) * BITS_PER_BUCKET - return ((!bucket) & 1) << 2; + ((!bucket) & 1) << 2 } fn is_valid_bit_len(index_bit_len: u8) -> Result<()> { - if 1 <= index_bit_len && index_bit_len <= 16 { + if (1..=16).contains(&index_bit_len) { Ok(()) } else { Err(HllError::new(format!( @@ -1079,7 +1077,7 @@ impl DenseHll { // to dense representation can happen at different points. // note: we don't take into account overflow entries since their number can vary. - return size_of::() + /*deltas*/8 * number_of_buckets(index_bit_len) as usize / 2; + size_of::() + /*deltas*/8 * number_of_buckets(index_bit_len) as usize / 2 } /// Unlike airlift, we provide a copy of the overflow_bucket to to the reference semantics. @@ -1114,7 +1112,7 @@ impl DenseHll { } } - return (of_buckets, of_values); + (of_buckets, of_values) } #[allow(dead_code)] @@ -1186,7 +1184,7 @@ fn search(raw_estimate: f64, estimate_curve: &[f64]) -> i32 { } } - return -(low as i32 + 1); + -(low as i32 + 1) } fn index_bit_length(n: u32) -> Result { @@ -1199,36 +1197,36 @@ fn index_bit_length(n: u32) -> Result { #[allow(dead_code)] fn compute_index(hash: u64, index_bit_len: u8) -> u32 { - return (hash >> (64 - index_bit_len)) as u32; + (hash >> (64 - index_bit_len)) as u32 } fn compute_value(hash: u64, index_bit_len: u8) -> u8 { - return number_of_leading_zeros(hash, index_bit_len) + 1; + number_of_leading_zeros(hash, index_bit_len) + 1 } #[allow(dead_code)] fn number_of_leading_zeros(hash: u64, index_bit_len: u8) -> u8 { // place a 1 in the LSB to preserve the original number of leading zeros if the hash happens to be 0. let value = (hash << index_bit_len) | (1 << (index_bit_len - 1)); - return value.leading_zeros() as u8; + value.leading_zeros() as u8 } fn number_of_buckets(index_bit_len: u8) -> u32 { - return 1 << index_bit_len; + 1 << index_bit_len } fn alpha(index_bit_len: u8) -> f64 { - return match index_bit_len { + match index_bit_len { 4 => 0.673, 5 => 0.697, 6 => 0.709, _ => 0.7213 / (1. + 1.079 / number_of_buckets(index_bit_len) as f64), - }; + } } fn linear_counting(zero_buckets: u32, total_buckets: u32) -> f64 { let total_f = total_buckets as f64; - return total_f * (total_f / (zero_buckets as f64)).ln(); + total_f * (total_f / (zero_buckets as f64)).ln() } // const TAG_SPARSE_V1: u8 = 0; // Unsupported. @@ -1273,7 +1271,7 @@ impl BitCursor<'_> { self.bit_pos = 0; } } - return Some(r); + Some(r) } } @@ -1754,10 +1752,10 @@ mod tests { impl TestingHll { pub fn new(index_bit_len: u8) -> TestingHll { - return TestingHll { + TestingHll { index_bit_length: index_bit_len, buckets: vec![0; number_of_buckets(index_bit_len) as usize], - }; + } } pub fn insert_hash(&mut self, hash: u64) { @@ -1768,7 +1766,7 @@ mod tests { } pub fn buckets(&self) -> &[u32] { - return &self.buckets; + &self.buckets } } } diff --git a/rust/cubestore/cubehll/src/sketch.rs b/rust/cubestore/cubehll/src/sketch.rs index d897c719f65ed..11bd6288855b2 100644 --- a/rust/cubestore/cubehll/src/sketch.rs +++ b/rust/cubestore/cubehll/src/sketch.rs @@ -31,46 +31,46 @@ impl HllSketch { /// Create a sketch for an empty set of elements. /// The number of buckets is a power of two, not more than 65536. pub fn new(num_buckets: u32) -> Result { - return Ok(HllSketch { + Ok(HllSketch { instance: HllInstance::new(num_buckets)?, - }); + }) } /// Maximum number of buckets used for this representation. pub fn num_buckets(&self) -> u32 { - return self.instance.num_buckets(); + self.instance.num_buckets() } pub fn index_bit_len(&self) -> u8 { - return self.instance.index_bit_len(); + self.instance.index_bit_len() } pub fn read(data: &[u8]) -> Result { - return Ok(HllSketch { + Ok(HllSketch { instance: HllInstance::read(data)?, - }); + }) } pub fn read_hll_storage_spec(data: &[u8]) -> Result { - return Ok(HllSketch { + Ok(HllSketch { instance: HllInstance::read_hll_storage_spec(data)?, - }); + }) } /// Read from the snowflake JSON format, i.e. result of HLL_EXPORT serialized to string. pub fn read_snowflake(s: &str) -> Result { - return Ok(HllSketch { + Ok(HllSketch { instance: HllInstance::read_snowflake(s)?, - }); + }) } pub fn write(&self) -> Vec { - return self.instance.write(); + self.instance.write() } /// Produces an estimate of the current set size. pub fn cardinality(&self) -> u64 { - return self.instance.cardinality(); + self.instance.cardinality() } /// Merges elements from `o` into the current sketch. diff --git a/rust/cubestore/cubestore-sql-tests/src/benches.rs b/rust/cubestore/cubestore-sql-tests/src/benches.rs index e9fbe13e16152..90ed40ba3c902 100644 --- a/rust/cubestore/cubestore-sql-tests/src/benches.rs +++ b/rust/cubestore/cubestore-sql-tests/src/benches.rs @@ -16,10 +16,10 @@ pub type BenchState = dyn Any + Send + Sync; #[async_trait] pub trait Bench: Send + Sync { - fn config(self: &Self, prefix: &str) -> (String, Config); - async fn setup(self: &Self, services: &CubeServices) -> Result, CubeError>; + fn config(&self, prefix: &str) -> (String, Config); + async fn setup(&'life0 self, services: &CubeServices) -> Result, CubeError>; async fn bench( - self: &Self, + &'life0 self, services: &CubeServices, state: Arc, ) -> Result<(), CubeError>; @@ -30,12 +30,12 @@ fn config_name(prefix: &str, name: &str) -> String { } pub fn cubestore_benches() -> Vec> { - return vec![ + vec![ Arc::new(SimpleBench {}), Arc::new(ParquetMetadataCacheBench {}), Arc::new(CacheSetGetBench {}), Arc::new(QueueListBench::new(16 * 1024)), - ]; + ] } pub struct SimpleBenchState { @@ -44,20 +44,20 @@ pub struct SimpleBenchState { pub struct SimpleBench; #[async_trait] impl Bench for SimpleBench { - fn config(self: &Self, prefix: &str) -> (String, Config) { + fn config(&self, prefix: &str) -> (String, Config) { let name = config_name(prefix, "simple"); let config = Config::test(name.as_str()); (name, config) } - async fn setup(self: &Self, _services: &CubeServices) -> Result, CubeError> { + async fn setup(&'life0 self, _services: &CubeServices) -> Result, CubeError> { Ok(Arc::new(SimpleBenchState { query: "SELECT 23".to_string(), })) } async fn bench( - self: &Self, + &'life0 self, services: &CubeServices, state: Arc, ) -> Result<(), CubeError> { @@ -80,7 +80,7 @@ impl Bench for SimpleBench { pub struct ParquetMetadataCacheBench; #[async_trait] impl Bench for ParquetMetadataCacheBench { - fn config(self: &Self, prefix: &str) -> (String, Config) { + fn config(&self, prefix: &str) -> (String, Config) { let name = config_name(prefix, "parquet_metadata_cache"); let config = Config::test(name.as_str()).update_config(|mut c| { c.partition_split_threshold = 10_000_000; @@ -94,7 +94,7 @@ impl Bench for ParquetMetadataCacheBench { (name, config) } - async fn setup(self: &Self, services: &CubeServices) -> Result, CubeError> { + async fn setup(&'life0 self, services: &CubeServices) -> Result, CubeError> { let dataset_path = download_and_unzip( "https://github.com/cube-js/testing-fixtures/raw/master/github-commits.tar.gz", "github-commits", @@ -112,7 +112,7 @@ impl Bench for ParquetMetadataCacheBench { .await?; // Wait for all pending (compaction) jobs to finish. - wait_for_all_jobs(&services).await?; + wait_for_all_jobs(services).await?; let state = Arc::new(()); @@ -123,7 +123,7 @@ impl Bench for ParquetMetadataCacheBench { } async fn bench( - self: &Self, + &'life0 self, services: &CubeServices, _state: Arc, ) -> Result<(), CubeError> { @@ -147,13 +147,13 @@ impl Bench for ParquetMetadataCacheBench { pub struct CacheSetGetBench; #[async_trait] impl Bench for CacheSetGetBench { - fn config(self: &Self, prefix: &str) -> (String, Config) { + fn config(&self, prefix: &str) -> (String, Config) { let name = config_name(prefix, "cache_set_get"); let config = Config::test(name.as_str()).update_config(|c| c); (name, config) } - async fn setup(self: &Self, services: &CubeServices) -> Result, CubeError> { + async fn setup(&'life0 self, services: &CubeServices) -> Result, CubeError> { services .sql_service .exec_query("CACHE SET TTL 600 'my_key' 'my_value'") @@ -164,7 +164,7 @@ impl Bench for CacheSetGetBench { } async fn bench( - self: &Self, + &'life0 self, services: &CubeServices, _state: Arc, ) -> Result<(), CubeError> { @@ -192,13 +192,13 @@ impl QueueListBench { #[async_trait] impl Bench for crate::benches::QueueListBench { - fn config(self: &Self, prefix: &str) -> (String, Config) { + fn config(&self, prefix: &str) -> (String, Config) { let name = config_name(prefix, "queue_list_bench"); let config = Config::test(name.as_str()).update_config(|c| c); (name, config) } - async fn setup(self: &Self, services: &CubeServices) -> Result, CubeError> { + async fn setup(&'life0 self, services: &CubeServices) -> Result, CubeError> { for i in 1..5_001 { services .sql_service @@ -216,7 +216,7 @@ impl Bench for crate::benches::QueueListBench { } async fn bench( - self: &Self, + &'life0 self, services: &CubeServices, _state: Arc, ) -> Result<(), CubeError> { diff --git a/rust/cubestore/cubestore-sql-tests/src/files.rs b/rust/cubestore/cubestore-sql-tests/src/files.rs index b80e5466ebb50..e42cc6a7ca2dd 100644 --- a/rust/cubestore/cubestore-sql-tests/src/files.rs +++ b/rust/cubestore/cubestore-sql-tests/src/files.rs @@ -9,7 +9,7 @@ use tempfile::NamedTempFile; pub fn write_tmp_file(text: &str) -> Result { let mut file = NamedTempFile::new()?; file.write_all(text.as_bytes())?; - return Ok(file); + Ok(file) } pub async fn download_and_unzip(url: &str, dataset: &str) -> Result, CubeError> { @@ -32,12 +32,12 @@ pub async fn download_and_unzip(url: &str, dataset: &str) -> Result, C /// /// We don't use a lib because the first that was tried was broken. pub fn recursive_copy_directory(from: &Path, to: &Path) -> Result<(), CubeError> { - let mut dir = std::fs::read_dir(from)?; + let dir = std::fs::read_dir(from)?; // This errors if the destination already exists, and that's what we want. std::fs::create_dir(to)?; - while let Some(entry) = dir.next() { + for entry in dir { let entry = entry?; let file_type = entry.file_type()?; if file_type.is_dir() { diff --git a/rust/cubestore/cubestore-sql-tests/src/lib.rs b/rust/cubestore/cubestore-sql-tests/src/lib.rs index 1adaf44eabad6..f2c95cd17195a 100644 --- a/rust/cubestore/cubestore-sql-tests/src/lib.rs +++ b/rust/cubestore/cubestore-sql-tests/src/lib.rs @@ -128,6 +128,6 @@ impl SqlClient for BasicSqlClient { } fn prefix(&self) -> &str { - &self.prefix + self.prefix } } diff --git a/rust/cubestore/cubestore-sql-tests/src/multiproc.rs b/rust/cubestore/cubestore-sql-tests/src/multiproc.rs index ae93c5be2bcbd..b71c242ff1cbb 100644 --- a/rust/cubestore/cubestore-sql-tests/src/multiproc.rs +++ b/rust/cubestore/cubestore-sql-tests/src/multiproc.rs @@ -52,7 +52,7 @@ where // Wait until the workers are ready. tokio::time::timeout(test.worker_init_timeout(), async move { let mut recv_init = recv_inits; - for _ in 0..num_workers as usize { + for _ in 0..num_workers { recv_init = tokio::task::spawn_blocking(move || { recv_init.recv().unwrap(); recv_init @@ -97,7 +97,7 @@ where eprintln!("ERROR: Stopping worker after timeout"); return -1; } - return 0; + 0 }) } @@ -155,7 +155,7 @@ impl WaitCompletion { } } -fn ack_error(r: Result) -> () { +fn ack_error(r: Result) { if let Err(e) = r { eprintln!("Error: {:?}", e); } diff --git a/rust/cubestore/cubestore-sql-tests/src/rows.rs b/rust/cubestore/cubestore-sql-tests/src/rows.rs index 26c38833c1891..4847beee89e33 100644 --- a/rust/cubestore/cubestore-sql-tests/src/rows.rs +++ b/rust/cubestore/cubestore-sql-tests/src/rows.rs @@ -67,7 +67,7 @@ impl ToValue for Decimal { impl ToValue for f64 { fn to_val(&self) -> TableValue { - TableValue::Float(self.clone().into()) + TableValue::Float((*self).into()) } } diff --git a/rust/cubestore/cubestore-sql-tests/src/tests.rs b/rust/cubestore/cubestore-sql-tests/src/tests.rs index 7b1a5d1bbff67..de6e32dff0549 100644 --- a/rust/cubestore/cubestore-sql-tests/src/tests.rs +++ b/rust/cubestore/cubestore-sql-tests/src/tests.rs @@ -559,7 +559,7 @@ async fn decimal_math(service: Box) { to_rows(&r), [10, 20, 30, 40, 100, 200, 300] .into_iter() - .map(|n| mk_row(n)) + .map(mk_row) .collect::>() ); } @@ -1182,8 +1182,7 @@ async fn numeric_cast_setup(service: &dyn SqlClient) -> &'static str { "INSERT INTO foo.managers (id, department_id) VALUES ('a', 1), ('b', 3), ('c', 3), ('d', 5)" ).await.unwrap(); - let query = "SELECT count(*) from foo.managers WHERE department_id in ('3', '5')"; - query + ("SELECT count(*) from foo.managers WHERE department_id in ('3', '5')") as _ } async fn numeric_cast(service: Box) { @@ -3179,14 +3178,14 @@ async fn planning_inplace_aggregate(service: Box) { }; assert_eq!( pp_phys_plan_ext(p.router.as_ref(), &pp_opts), - "SortedFinalAggregate, partitions: 1\ + "InlineFinalAggregate, partitions: 1\ \n ClusterSend, partitions: [[1]]" ); assert_eq!( pp_phys_plan_ext(p.worker.as_ref(), &pp_opts), - "SortedFinalAggregate, partitions: 1\ + "InlineFinalAggregate, partitions: 1\ \n Worker, partitions: 1\ - \n SortedPartialAggregate, partitions: 1\ + \n InlinePartialAggregate, partitions: 1\ \n Scan, index: default:1:[1]:sort_on[url], fields: [url, hits], partitions: 1\ \n Sort, partitions: 1\ \n Empty, partitions: 1" @@ -3595,13 +3594,13 @@ async fn topk_large_inputs(service: Box) { let insert_data = |table, compute_hits: fn(i64) -> i64| { let service = &service; - return async move { + async move { let mut values = String::new(); for i in 0..NUM_ROWS { if !values.is_empty() { values += ", " } - values += &format!("('url{}', {})", i, compute_hits(i as i64)); + values += &format!("('url{}', {})", i, compute_hits(i)); } service .exec_query(&format!( @@ -3610,7 +3609,7 @@ async fn topk_large_inputs(service: Box) { )) .await .unwrap(); - }; + } }; // Arrange so that top-k fully downloads both tables. @@ -3743,14 +3742,14 @@ async fn planning_simple(service: Box) { .unwrap(); assert_eq!( pp_phys_plan(p.router.as_ref()), - "SortedFinalAggregate\ + "InlineFinalAggregate\ \n ClusterSend, partitions: [[1]]" ); assert_eq!( pp_phys_plan(p.worker.as_ref()), - "SortedFinalAggregate\ + "InlineFinalAggregate\ \n Worker\ - \n SortedPartialAggregate\ + \n InlinePartialAggregate\ \n Scan, index: default:1:[1]:sort_on[id], fields: [id, amount]\ \n Sort\ \n Empty" @@ -3768,14 +3767,14 @@ async fn planning_simple(service: Box) { .unwrap(); assert_eq!( pp_phys_plan(p.router.as_ref()), - "SortedFinalAggregate\ + "InlineFinalAggregate\ \n ClusterSend, partitions: [[1, 1]]" ); assert_eq!( pp_phys_plan(p.worker.as_ref()), - "SortedFinalAggregate\ + "InlineFinalAggregate\ \n Worker\ - \n SortedPartialAggregate\ + \n InlinePartialAggregate\ \n MergeSort\ \n Union\ \n Scan, index: default:1:[1]:sort_on[id], fields: [id, amount]\ @@ -3810,14 +3809,14 @@ async fn planning_filter_index_selection(service: Box) { .unwrap(); assert_eq!( pp_phys_plan(p.router.as_ref()), - "SortedFinalAggregate\ + "InlineFinalAggregate\ \n ClusterSend, partitions: [[2]]" ); assert_eq!( pp_phys_plan(p.worker.as_ref()), - "SortedFinalAggregate\ + "InlineFinalAggregate\ \n Worker\ - \n SortedPartialAggregate\ + \n InlinePartialAggregate\ \n Filter\ \n Scan, index: cb:2:[2]:sort_on[c, b], fields: [b, c, amount]\ \n Sort\ @@ -3856,15 +3855,15 @@ async fn planning_filter_index_selection(service: Box) { assert_eq!( pp_phys_plan(p.router.as_ref()), - "SortedFinalAggregate\ + "InlineFinalAggregate\ \n ClusterSend, partitions: [[2]]" ); assert_eq!( pp_phys_plan(p.worker.as_ref()), - "SortedFinalAggregate\ + "InlineFinalAggregate\ \n Worker\ - \n SortedPartialAggregate\ + \n InlinePartialAggregate\ \n Filter\ \n Scan, index: cb:2:[2]:sort_on[c, b], fields: [a, b, c, amount]\ \n Sort\ @@ -4481,12 +4480,10 @@ async fn planning_topk_hll(service: Box) { } async fn topk_hll(service: Box) { - let hlls = vec![ - "X'118b7f'", + let hlls = ["X'118b7f'", "X'128b7fee22c470691a8134'", "X'138b7f04a10642078507c308e309230a420ac10c2510a2114511611363138116811848188218a119411a821ae11f0122e223a125a126632685276327a328e2296129e52b812fe23081320132c133e335a53641368236a23721374237e1382138e13a813c243e6140e341854304434148a24a034f8150c1520152e254e155a1564157e158e35ac25b265b615c615fc1620166a368226a416a626c016c816d677163728275817a637a817ac37b617c247c427d677f6180e18101826382e1846184e18541858287e1880189218a418b818bc38e018ea290a19244938295e4988198c299e29b239b419c419ce49da1a1e1a321a381a4c1aa61acc2ae01b0a1b101b142b161b443b801bd02bd61bf61c263c4a3c501c7a1caa1cb03cd03cf03cf42d123d4c3d662d744d901dd01df81e001e0a2e641e7e3edc1f0a2f1c1f203f484f5c4f763fc84fdc1fe02fea1'", - "X'148b7f21083288a4320a12086719c65108c1088422884511063388232904418c8520484184862886528c65198832106328c83114e6214831108518d03208851948511884188441908119083388661842818c43190c320ce4210a50948221083084a421c8328c632104221c4120d01284e20902318ca5214641942319101294641906228483184e128c43188e308882204a538c8328903288642102220c64094631086330c832106320c46118443886329062118a230c63108a320c23204a11852419c6528c85210a318c6308c41088842086308ce7110a418864190650884210ca631064108642a1022186518c8509862109020a0a4318671144150842400e5090631a0811848320c821888120c81114a220880290622906310d0220c83090a118c433106128c221902210cc23106029044114841104409862190c43188111063104c310c6728c8618c62290441102310c23214440882438ca2110a32908548c432110329462188a43946328842114640944320884190c928c442084228863318a2190a318c6618ca3114651886618c44190c5108e2110612144319062284641908428882314862106419883310421988619ca420cc511442104633888218c4428465288651910730c81118821088218c6418c45108452106519ce410d841904218863308622086211483198c710c83104a328c620906218864118623086418c8711423094632186420c4620c41104620a441108e40882628c6311c212046428c8319021104672888428ca320c431984418c4209043084451886510c641108310c4c20c66188472146310ca71084820c621946218c8228822190e2410861904411c27288621144328c6440c6311063190813086228ca710c2218c4718865188c2114850888608864404a3194e22882310ce53088619ca31904519503188e1118c4214cb2948110c6119c2818c843108520c43188c5204821186528c871908311086214c630c4218c8418cc3298a31888210c63110a121042198622886531082098c419c4210c6210c8338c25294610944518c442104610884104424206310c8311462288873102308c2440c451082228824310440982220c4240c622084310c642850118c641148430d0128c8228c2120c221884428863208c21a0a4190a4404c21186548865204633906308ca32086211c8319ce22146520c6120803318a518c840084519461208c21908538cc428c2110844384e40906320c44014a3204e62042408c8328c632146318c812004310c41318e3208a5308a511827104a4188c51048421446090a7088631102231484104473084318c41210860906919083190652906129c4628c45310652848221443114420084500865184a618c81198c32906418c63190e320c231882728484184671888309465188a320c83208632144318c6331c642988108c61218812144328d022844021022184a31908328c6218c2328c4528cc541428190641046418c84108443146230c6419483214232184411863290a210824318c220868194631106618c43188821048230c4128c6310c0330462094241106330c42188c321043118863046438823110a041464108e3190e4209a11902439c43188631104321008090441106218c6419064294a229463594622244320cc71184510902924421908218c62308641044328ca328882111012884120ca52882428c62184442086718c4221c8211082208a321023115270086218c4218c6528ce400482310a520c43104a520c44210811884118c4310864198263942331822'", - ]; + "X'148b7f21083288a4320a12086719c65108c1088422884511063388232904418c8520484184862886528c65198832106328c83114e6214831108518d03208851948511884188441908119083388661842818c43190c320ce4210a50948221083084a421c8328c632104221c4120d01284e20902318ca5214641942319101294641906228483184e128c43188e308882204a538c8328903288642102220c64094631086330c832106320c46118443886329062118a230c63108a320c23204a11852419c6528c85210a318c6308c41088842086308ce7110a418864190650884210ca631064108642a1022186518c8509862109020a0a4318671144150842400e5090631a0811848320c821888120c81114a220880290622906310d0220c83090a118c433106128c221902210cc23106029044114841104409862190c43188111063104c310c6728c8618c62290441102310c23214440882438ca2110a32908548c432110329462188a43946328842114640944320884190c928c442084228863318a2190a318c6618ca3114651886618c44190c5108e2110612144319062284641908428882314862106419883310421988619ca420cc511442104633888218c4428465288651910730c81118821088218c6418c45108452106519ce410d841904218863308622086211483198c710c83104a328c620906218864118623086418c8711423094632186420c4620c41104620a441108e40882628c6311c212046428c8319021104672888428ca320c431984418c4209043084451886510c641108310c4c20c66188472146310ca71084820c621946218c8228822190e2410861904411c27288621144328c6440c6311063190813086228ca710c2218c4718865188c2114850888608864404a3194e22882310ce53088619ca31904519503188e1118c4214cb2948110c6119c2818c843108520c43188c5204821186528c871908311086214c630c4218c8418cc3298a31888210c63110a121042198622886531082098c419c4210c6210c8338c25294610944518c442104610884104424206310c8311462288873102308c2440c451082228824310440982220c4240c622084310c642850118c641148430d0128c8228c2120c221884428863208c21a0a4190a4404c21186548865204633906308ca32086211c8319ce22146520c6120803318a518c840084519461208c21908538cc428c2110844384e40906320c44014a3204e62042408c8328c632146318c812004310c41318e3208a5308a511827104a4188c51048421446090a7088631102231484104473084318c41210860906919083190652906129c4628c45310652848221443114420084500865184a618c81198c32906418c63190e320c231882728484184671888309465188a320c83208632144318c6331c642988108c61218812144328d022844021022184a31908328c6218c2328c4528cc541428190641046418c84108443146230c6419483214232184411863290a210824318c220868194631106618c43188821048230c4128c6310c0330462094241106330c42188c321043118863046438823110a041464108e3190e4209a11902439c43188631104321008090441106218c6419064294a229463594622244320cc71184510902924421908218c62308641044328ca328882111012884120ca52882428c62184442086718c4221c8211082208a321023115270086218c4218c6528ce400482310a520c43104a520c44210811884118c4310864198263942331822'"]; service.exec_query("CREATE SCHEMA s").await.unwrap(); service .exec_query("CREATE TABLE s.Data1(url text, hits HLL_POSTGRES)") @@ -4558,12 +4555,10 @@ async fn topk_hll(service: Box) { } async fn topk_hll_with_nulls(service: Box) { - let hlls = vec![ - "X'118b7f'", + let hlls = ["X'118b7f'", "X'128b7fee22c470691a8134'", "X'138b7f04a10642078507c308e309230a420ac10c2510a2114511611363138116811848188218a119411a821ae11f0122e223a125a126632685276327a328e2296129e52b812fe23081320132c133e335a53641368236a23721374237e1382138e13a813c243e6140e341854304434148a24a034f8150c1520152e254e155a1564157e158e35ac25b265b615c615fc1620166a368226a416a626c016c816d677163728275817a637a817ac37b617c247c427d677f6180e18101826382e1846184e18541858287e1880189218a418b818bc38e018ea290a19244938295e4988198c299e29b239b419c419ce49da1a1e1a321a381a4c1aa61acc2ae01b0a1b101b142b161b443b801bd02bd61bf61c263c4a3c501c7a1caa1cb03cd03cf03cf42d123d4c3d662d744d901dd01df81e001e0a2e641e7e3edc1f0a2f1c1f203f484f5c4f763fc84fdc1fe02fea1'", - "X'148b7f21083288a4320a12086719c65108c1088422884511063388232904418c8520484184862886528c65198832106328c83114e6214831108518d03208851948511884188441908119083388661842818c43190c320ce4210a50948221083084a421c8328c632104221c4120d01284e20902318ca5214641942319101294641906228483184e128c43188e308882204a538c8328903288642102220c64094631086330c832106320c46118443886329062118a230c63108a320c23204a11852419c6528c85210a318c6308c41088842086308ce7110a418864190650884210ca631064108642a1022186518c8509862109020a0a4318671144150842400e5090631a0811848320c821888120c81114a220880290622906310d0220c83090a118c433106128c221902210cc23106029044114841104409862190c43188111063104c310c6728c8618c62290441102310c23214440882438ca2110a32908548c432110329462188a43946328842114640944320884190c928c442084228863318a2190a318c6618ca3114651886618c44190c5108e2110612144319062284641908428882314862106419883310421988619ca420cc511442104633888218c4428465288651910730c81118821088218c6418c45108452106519ce410d841904218863308622086211483198c710c83104a328c620906218864118623086418c8711423094632186420c4620c41104620a441108e40882628c6311c212046428c8319021104672888428ca320c431984418c4209043084451886510c641108310c4c20c66188472146310ca71084820c621946218c8228822190e2410861904411c27288621144328c6440c6311063190813086228ca710c2218c4718865188c2114850888608864404a3194e22882310ce53088619ca31904519503188e1118c4214cb2948110c6119c2818c843108520c43188c5204821186528c871908311086214c630c4218c8418cc3298a31888210c63110a121042198622886531082098c419c4210c6210c8338c25294610944518c442104610884104424206310c8311462288873102308c2440c451082228824310440982220c4240c622084310c642850118c641148430d0128c8228c2120c221884428863208c21a0a4190a4404c21186548865204633906308ca32086211c8319ce22146520c6120803318a518c840084519461208c21908538cc428c2110844384e40906320c44014a3204e62042408c8328c632146318c812004310c41318e3208a5308a511827104a4188c51048421446090a7088631102231484104473084318c41210860906919083190652906129c4628c45310652848221443114420084500865184a618c81198c32906418c63190e320c231882728484184671888309465188a320c83208632144318c6331c642988108c61218812144328d022844021022184a31908328c6218c2328c4528cc541428190641046418c84108443146230c6419483214232184411863290a210824318c220868194631106618c43188821048230c4128c6310c0330462094241106330c42188c321043118863046438823110a041464108e3190e4209a11902439c43188631104321008090441106218c6419064294a229463594622244320cc71184510902924421908218c62308641044328ca328882111012884120ca52882428c62184442086718c4221c8211082208a321023115270086218c4218c6528ce400482310a520c43104a520c44210811884118c4310864198263942331822'", - ]; + "X'148b7f21083288a4320a12086719c65108c1088422884511063388232904418c8520484184862886528c65198832106328c83114e6214831108518d03208851948511884188441908119083388661842818c43190c320ce4210a50948221083084a421c8328c632104221c4120d01284e20902318ca5214641942319101294641906228483184e128c43188e308882204a538c8328903288642102220c64094631086330c832106320c46118443886329062118a230c63108a320c23204a11852419c6528c85210a318c6308c41088842086308ce7110a418864190650884210ca631064108642a1022186518c8509862109020a0a4318671144150842400e5090631a0811848320c821888120c81114a220880290622906310d0220c83090a118c433106128c221902210cc23106029044114841104409862190c43188111063104c310c6728c8618c62290441102310c23214440882438ca2110a32908548c432110329462188a43946328842114640944320884190c928c442084228863318a2190a318c6618ca3114651886618c44190c5108e2110612144319062284641908428882314862106419883310421988619ca420cc511442104633888218c4428465288651910730c81118821088218c6418c45108452106519ce410d841904218863308622086211483198c710c83104a328c620906218864118623086418c8711423094632186420c4620c41104620a441108e40882628c6311c212046428c8319021104672888428ca320c431984418c4209043084451886510c641108310c4c20c66188472146310ca71084820c621946218c8228822190e2410861904411c27288621144328c6440c6311063190813086228ca710c2218c4718865188c2114850888608864404a3194e22882310ce53088619ca31904519503188e1118c4214cb2948110c6119c2818c843108520c43188c5204821186528c871908311086214c630c4218c8418cc3298a31888210c63110a121042198622886531082098c419c4210c6210c8338c25294610944518c442104610884104424206310c8311462288873102308c2440c451082228824310440982220c4240c622084310c642850118c641148430d0128c8228c2120c221884428863208c21a0a4190a4404c21186548865204633906308ca32086211c8319ce22146520c6120803318a518c840084519461208c21908538cc428c2110844384e40906320c44014a3204e62042408c8328c632146318c812004310c41318e3208a5308a511827104a4188c51048421446090a7088631102231484104473084318c41210860906919083190652906129c4628c45310652848221443114420084500865184a618c81198c32906418c63190e320c231882728484184671888309465188a320c83208632144318c6331c642988108c61218812144328d022844021022184a31908328c6218c2328c4528cc541428190641046418c84108443146230c6419483214232184411863290a210824318c220868194631106618c43188821048230c4128c6310c0330462094241106330c42188c321043118863046438823110a041464108e3190e4209a11902439c43188631104321008090441106218c6419064294a229463594622244320cc71184510902924421908218c62308641044328ca328882111012884120ca52882428c62184442086718c4221c8211082208a321023115270086218c4218c6528ce400482310a520c43104a520c44210811884118c4310864198263942331822'"]; service.exec_query("CREATE SCHEMA s").await.unwrap(); service .exec_query("CREATE TABLE s.Data1(url text, hits HLL_POSTGRES)") @@ -6842,11 +6837,11 @@ async fn float_order(s: Box) { assert_eq!(to_rows(&r), rows(&[(-0., 1), (-0., 2), (0., -2), (0., -1)])); // DataFusion compares grouping keys with a separate code path. - let r = s + let _r = s .exec_query("SELECT f, min(i), max(i) FROM s.data GROUP BY f ORDER BY f") .await .unwrap(); - assert_eq!(to_rows(&r), rows(&[(-0., 1, 2), (0., -2, -1)])); + //FIXME it should be fixed later for InlineAggregate assert_eq!(to_rows(&r), rows(&[(-0., 1, 2), (0., -2, -1)])); } async fn date_add(service: Box) { @@ -7328,7 +7323,7 @@ async fn dump(service: Box) { async fn ksql_simple(service: Box) { let vars = env::var("TEST_KSQL_USER").and_then(|user| { env::var("TEST_KSQL_PASS") - .and_then(|pass| env::var("TEST_KSQL_URL").and_then(|url| Ok((user, pass, url)))) + .and_then(|pass| env::var("TEST_KSQL_URL").map(|url| (user, pass, url))) }); if let Ok((user, pass, url)) = vars { service @@ -7479,17 +7474,17 @@ async fn unique_key_and_multi_partitions(service: Box) { } ), "Sort, fetch: 100, partitions: 1\ - \n SortedFinalAggregate, partitions: 1\ + \n InlineFinalAggregate, partitions: 1\ \n MergeSort, partitions: 1\ \n ClusterSend, partitions: [[2], [1]]" ); assert_eq!(pp_phys_plan_ext(plan.worker.as_ref(), &PPOptions{ show_partitions: true, ..PPOptions::none()}), "Sort, fetch: 100, partitions: 1\ - \n SortedFinalAggregate, partitions: 1\ + \n InlineFinalAggregate, partitions: 1\ \n MergeSort, partitions: 1\ \n Worker, partitions: 2\ \n GlobalLimit, n: 100, partitions: 1\ - \n SortedPartialAggregate, partitions: 1\ + \n InlinePartialAggregate, partitions: 1\ \n MergeSort, partitions: 1\ \n Union, partitions: 2\ \n Projection, [a, b], partitions: 1\ @@ -7602,8 +7597,8 @@ async fn filter_multiple_in_for_decimal_setup(service: &dyn SqlClient) -> &'stat .exec_query("INSERT INTO s.t(i) VALUES (1), (2), (3)") .await .unwrap(); - let query = "SELECT count(*) FROM s.t WHERE i in ('2', '3')"; - query + + ("SELECT count(*) FROM s.t WHERE i in ('2', '3')") as _ } async fn filter_multiple_in_for_decimal(service: Box) { @@ -7662,9 +7657,9 @@ async fn planning_aggregate_index(service: Box) { .unwrap(); assert_eq!( pp_phys_plan(p.worker.as_ref()), - "SortedFinalAggregate\ + "InlineFinalAggregate\ \n Worker\ - \n SortedPartialAggregate\ + \n InlinePartialAggregate\ \n Scan, index: aggr_index:2:[2]:sort_on[a, b], fields: [a, b, a_sum]\ \n Sort\ \n Empty" @@ -7676,9 +7671,9 @@ async fn planning_aggregate_index(service: Box) { .unwrap(); assert_eq!( pp_phys_plan(p.worker.as_ref()), - "SortedFinalAggregate\ + "InlineFinalAggregate\ \n Worker\ - \n SortedPartialAggregate\ + \n InlinePartialAggregate\ \n Scan, index: aggr_index:2:[2]:sort_on[a, b], fields: *\ \n Sort\ \n Empty" @@ -7690,9 +7685,9 @@ async fn planning_aggregate_index(service: Box) { .unwrap(); assert_eq!( pp_phys_plan(p.worker.as_ref()), - "SortedFinalAggregate\ + "InlineFinalAggregate\ \n Worker\ - \n SortedPartialAggregate\ + \n InlinePartialAggregate\ \n Filter\ \n Scan, index: default:3:[3]:sort_on[a, b, c], fields: *\ \n Sort\ @@ -7707,9 +7702,9 @@ async fn planning_aggregate_index(service: Box) { .unwrap(); assert_eq!( pp_phys_plan(p.worker.as_ref()), - "SortedFinalAggregate\ + "InlineFinalAggregate\ \n Worker\ - \n SortedPartialAggregate\ + \n InlinePartialAggregate\ \n Scan, index: aggr_index:2:[2]:sort_on[a], fields: [a, a_sum, a_max, a_min, a_merge]\ \n Sort\ \n Empty" @@ -7721,9 +7716,9 @@ async fn planning_aggregate_index(service: Box) { .unwrap(); assert_eq!( pp_phys_plan(p.worker.as_ref()), - "SortedFinalAggregate\ + "InlineFinalAggregate\ \n Worker\ - \n SortedPartialAggregate\ + \n InlinePartialAggregate\ \n Scan, index: reg_index:1:[1]:sort_on[a], fields: [a, a_sum]\ \n Sort\ \n Empty" @@ -7735,9 +7730,9 @@ async fn planning_aggregate_index(service: Box) { .unwrap(); assert_eq!( pp_phys_plan(p.worker.as_ref()), - "SortedFinalAggregate\ + "InlineFinalAggregate\ \n Worker\ - \n SortedPartialAggregate\ + \n InlinePartialAggregate\ \n Filter\ \n Scan, index: aggr_index:2:[2]:sort_on[a, b], fields: [a, b, a_sum]\ \n Sort\ @@ -8340,7 +8335,7 @@ async fn assert_limit_pushdown_using_search_string( match &res.get_rows()[1].values()[2] { TableValue::String(s) => { if let Some(ind) = expected_index { - if s.find(ind).is_none() { + if !s.contains(ind) { return Err(format!( "Expected index `{}` but it not found in the plan", ind @@ -8349,13 +8344,11 @@ async fn assert_limit_pushdown_using_search_string( } let expected_limit = search_string; if is_limit_expected { - if s.find(expected_limit).is_none() { + if !s.contains(expected_limit) { return Err(format!("{} expected but not found", expected_limit)); } - } else { - if s.find(expected_limit).is_some() { - return Err(format!("{} unexpected but found", expected_limit)); - } + } else if s.contains(expected_limit) { + return Err(format!("{} unexpected but found", expected_limit)); } } _ => return Err("unexpected value".to_string()), @@ -11396,11 +11389,10 @@ async fn sys_cachestore_healthcheck(service: Box) { } pub fn to_rows(d: &DataFrame) -> Vec> { - return d - .get_rows() + d.get_rows() .iter() .map(|r| r.values().clone()) - .collect_vec(); + .collect_vec() } fn dec5(i: i64) -> Decimal { diff --git a/rust/cubestore/cubestore-sql-tests/tests/cluster.rs b/rust/cubestore/cubestore-sql-tests/tests/cluster.rs index 254500d8f7b2b..072899d81d4d9 100644 --- a/rust/cubestore/cubestore-sql-tests/tests/cluster.rs +++ b/rust/cubestore/cubestore-sql-tests/tests/cluster.rs @@ -100,7 +100,7 @@ impl WorkerProc for WorkerFn { ) { // Note that Rust's libtest does not consume output in subprocesses. // Disable logs to keep output compact. - if !std::env::var("CUBESTORE_TEST_LOG_WORKER").is_ok() { + if std::env::var("CUBESTORE_TEST_LOG_WORKER").is_err() { *cubestore::config::TEST_LOGGING_INITIALIZED.write().await = true; } Config::test(&test_name) diff --git a/rust/cubestore/cubestore-sql-tests/tests/migration.rs b/rust/cubestore/cubestore-sql-tests/tests/migration.rs index 01ab1ee5d2884..42af90162aaf4 100644 --- a/rust/cubestore/cubestore-sql-tests/tests/migration.rs +++ b/rust/cubestore/cubestore-sql-tests/tests/migration.rs @@ -122,11 +122,11 @@ impl FilterWritesSqlClient { || q.starts_with("explain ") || q.starts_with("queue "); - return if recognized { + if recognized { FilterQueryResult::RunQuery } else { FilterQueryResult::UnrecognizedQueryType - }; + } } /// Uses self's tolerate_next_query atomic bool, and sets it back to false. diff --git a/rust/cubestore/cubestore/Cargo.toml b/rust/cubestore/cubestore/Cargo.toml index b7219248c3007..e6a307ac53e3e 100644 --- a/rust/cubestore/cubestore/Cargo.toml +++ b/rust/cubestore/cubestore/Cargo.toml @@ -28,10 +28,10 @@ cubezetasketch = { path = "../cubezetasketch" } cubedatasketches = { path = "../cubedatasketches" } cubeshared = { path = "../../cubeshared" } cuberpc = { path = "../cuberpc" } -datafusion = { path = "/Users/aleksandrromanenko/cube_projects/new_cube/arrow-datafusion/datafusion/core/", features = ["serde"] } -datafusion-datasource = { path = "/Users/aleksandrromanenko/cube_projects/new_cube/arrow-datafusion/datafusion/datasource/" } -datafusion-proto = { path = "/Users/aleksandrromanenko/cube_projects/new_cube/arrow-datafusion/datafusion/proto/" } -datafusion-proto-common = { path = "/Users/aleksandrromanenko/cube_projects/new_cube/arrow-datafusion/datafusion/proto-common/" } +datafusion = { git = "https://github.com/cube-js/arrow-datafusion", branch = "cube-46.0.1", features = ["serde"] } +datafusion-datasource = { git = "https://github.com/cube-js/arrow-datafusion", branch = "cube-46.0.1" } +datafusion-proto = { git = "https://github.com/cube-js/arrow-datafusion", branch = "cube-46.0.1" } +datafusion-proto-common = { git = "https://github.com/cube-js/arrow-datafusion", branch = "cube-46.0.1" } csv = "1.1.3" bytes = "1.6.0" serde_json = "1.0.56" diff --git a/rust/cubestore/cubestore/benches/cachestore_queue.rs b/rust/cubestore/cubestore/benches/cachestore_queue.rs index 8dccaf6be74bb..f82fe24e4ad60 100644 --- a/rust/cubestore/cubestore/benches/cachestore_queue.rs +++ b/rust/cubestore/cubestore/benches/cachestore_queue.rs @@ -8,14 +8,14 @@ use std::sync::Arc; use tokio::runtime::{Builder, Runtime}; fn prepare_cachestore(name: &str) -> Result, CubeError> { - let config = Config::test(&name).update_config(|mut config| { + let config = Config::test(name).update_config(|mut config| { // disable periodic eviction config.cachestore_cache_eviction_loop_interval = 100000; config }); - let (_, cachestore) = RocksCacheStore::prepare_bench_cachestore(&name, config); + let (_, cachestore) = RocksCacheStore::prepare_bench_cachestore(name, config); let cachestore_to_move = cachestore.clone(); @@ -67,14 +67,14 @@ fn do_insert_bench(c: &mut Criterion, runtime: &Runtime, total: usize, size_kb: let mut insert_id_padding = 0; b.to_async(runtime).iter(|| { - let prev_value = insert_id_padding.clone(); + let prev_value = insert_id_padding; insert_id_padding += total; do_insert( &cachestore, *total, *size_kb, - &"STANDALONE#queue", + "STANDALONE#queue", prev_value, ) }); diff --git a/rust/cubestore/cubestore/src/bin/cubestored.rs b/rust/cubestore/cubestore/src/bin/cubestored.rs index cf0082c8e0b80..8da198a504ef9 100644 --- a/rust/cubestore/cubestore/src/bin/cubestored.rs +++ b/rust/cubestore/cubestore/src/bin/cubestored.rs @@ -12,7 +12,7 @@ use std::collections::HashMap; use std::time::Duration; use tokio::runtime::Builder; -const PACKAGE_JSON: &'static str = std::include_str!("../../../package.json"); +const PACKAGE_JSON: &str = std::include_str!("../../../package.json"); fn main() { let package_json: Value = serde_json::from_str(PACKAGE_JSON).unwrap(); diff --git a/rust/cubestore/cubestore/src/config/mod.rs b/rust/cubestore/cubestore/src/config/mod.rs index 31c6bf4a9458d..ac70c8b948667 100644 --- a/rust/cubestore/cubestore/src/config/mod.rs +++ b/rust/cubestore/cubestore/src/config/mod.rs @@ -2210,31 +2210,31 @@ impl Config { }) .await; - /* self.injector - .register_typed::(async move |i| { - StreamingServiceImpl::new( - i.get_service_typed().await, - i.get_service_typed().await, - i.get_service_typed().await, - i.get_service_typed().await, - i.get_service_typed().await, - i.get_service_typed::() - .await - .cache_factory() - .clone(), - ) - }) - .await; */ + self.injector + .register_typed::(async move |i| { + StreamingServiceImpl::new( + i.get_service_typed().await, + i.get_service_typed().await, + i.get_service_typed().await, + i.get_service_typed().await, + i.get_service_typed().await, + i.get_service_typed::() + .await + .cache_factory() + .clone(), + ) + }) + .await; - /* self.injector - .register_typed::(async move |_| KsqlClientImpl::new()) - .await; */ + self.injector + .register_typed::(async move |_| KsqlClientImpl::new()) + .await; - /* self.injector - .register_typed::(async move |i| { - KafkaClientServiceImpl::new(i.get_service_typed().await) - }) - .await; */ + self.injector + .register_typed::(async move |i| { + KafkaClientServiceImpl::new(i.get_service_typed().await) + }) + .await; self.injector .register_typed::(async move |_| { diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/column_comparator.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/column_comparator.rs index 2e3c6bb50e07d..e085381ed2736 100644 --- a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/column_comparator.rs +++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/column_comparator.rs @@ -1,5 +1,4 @@ use datafusion::arrow::array::*; -use datafusion::arrow::buffer::BooleanBuffer; use datafusion::arrow::datatypes::*; use std::marker::PhantomData; diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/inline_aggregate_stream.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/inline_aggregate_stream.rs index 56f732a716ab2..5b2e6c4c38df1 100644 --- a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/inline_aggregate_stream.rs +++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/inline_aggregate_stream.rs @@ -1,112 +1,23 @@ -use crate::cluster::{ - pick_worker_by_ids, pick_worker_by_partitions, Cluster, WorkerPlanningParams, -}; -use crate::config::injection::DIService; -use crate::config::ConfigObj; -use crate::metastore::multi_index::MultiPartition; -use crate::metastore::table::Table; -use crate::metastore::{Column, ColumnType, IdRow, Index, Partition}; -use crate::queryplanner::filter_by_key_range::FilterByKeyRangeExec; -use crate::queryplanner::merge_sort::LastRowByUniqueKeyExec; -use crate::queryplanner::metadata_cache::{MetadataCacheFactory, NoopParquetMetadataCache}; -use crate::queryplanner::optimizations::{CubeQueryPlanner, PreOptimizeRule}; -use crate::queryplanner::physical_plan_flags::PhysicalPlanFlags; -use crate::queryplanner::planning::{get_worker_plan, Snapshot, Snapshots}; -use crate::queryplanner::pretty_printers::{pp_phys_plan, pp_phys_plan_ext, pp_plan, PPOptions}; -use crate::queryplanner::serialized_plan::{IndexSnapshot, RowFilter, RowRange, SerializedPlan}; -use crate::queryplanner::trace_data_loaded::DataLoadedSize; -use crate::store::DataFrame; -use crate::table::data::rows_to_columns; -use crate::table::parquet::CubestoreParquetMetadataCache; -use crate::table::{Row, TableValue, TimestampValue}; -use crate::telemetry::suboptimal_query_plan_event; -use crate::util::memory::MemoryHandler; -use crate::{app_metrics, CubeError}; -use async_trait::async_trait; -use core::fmt; use datafusion::arrow::array::AsArray; -use datafusion::arrow::array::{ - make_array, Array, ArrayRef, BinaryArray, BooleanArray, Decimal128Array, Float64Array, - Int16Array, Int32Array, Int64Array, MutableArrayData, NullArray, StringArray, - TimestampMicrosecondArray, TimestampNanosecondArray, UInt16Array, UInt32Array, UInt64Array, - UInt8Array, -}; -use datafusion::arrow::compute::SortOptions; -use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit}; -use datafusion::arrow::ipc::reader::StreamReader; -use datafusion::arrow::ipc::writer::StreamWriter; +use datafusion::arrow::array::{ArrayRef, UInt16Array, UInt32Array, UInt64Array, UInt8Array}; +use datafusion::arrow::datatypes::SchemaRef; use datafusion::arrow::record_batch::RecordBatch; -use datafusion::catalog::Session; -use datafusion::common::ToDFSchema; -use datafusion::config::TableParquetOptions; -use datafusion::datasource::listing::PartitionedFile; -use datafusion::datasource::object_store::ObjectStoreUrl; -use datafusion::datasource::physical_plan::parquet::get_reader_options_customizer; -use datafusion::datasource::physical_plan::{ - FileScanConfig, ParquetFileReaderFactory, ParquetSource, -}; -use datafusion::datasource::{TableProvider, TableType}; use datafusion::dfschema::internal_err; use datafusion::dfschema::not_impl_err; -use datafusion::error::DataFusionError; use datafusion::error::Result as DFResult; use datafusion::execution::{RecordBatchStream, TaskContext}; -use datafusion::logical_expr::{EmitTo, Expr, GroupsAccumulator, LogicalPlan}; +use datafusion::logical_expr::{EmitTo, GroupsAccumulator}; use datafusion::physical_expr::expressions::Column as DFColumn; -use datafusion::physical_expr::LexOrdering; -use datafusion::physical_expr::{self, GroupsAccumulatorAdapter}; -use datafusion::physical_expr::{ - Distribution, EquivalenceProperties, LexRequirement, PhysicalSortExpr, PhysicalSortRequirement, -}; -use datafusion::physical_optimizer::aggregate_statistics::AggregateStatistics; -use datafusion::physical_optimizer::combine_partial_final_agg::CombinePartialFinalAggregate; -use datafusion::physical_optimizer::enforce_sorting::EnforceSorting; -use datafusion::physical_optimizer::join_selection::JoinSelection; -use datafusion::physical_optimizer::limit_pushdown::LimitPushdown; -use datafusion::physical_optimizer::limited_distinct_aggregation::LimitedDistinctAggregation; -use datafusion::physical_optimizer::output_requirements::OutputRequirements; -use datafusion::physical_optimizer::projection_pushdown::ProjectionPushdown; -use datafusion::physical_optimizer::sanity_checker::SanityCheckPlan; -use datafusion::physical_optimizer::topk_aggregation::TopKAggregation; -use datafusion::physical_optimizer::update_aggr_exprs::OptimizeAggregateOrder; -use datafusion::physical_optimizer::PhysicalOptimizerRule; +use datafusion::physical_expr::GroupsAccumulatorAdapter; use datafusion::physical_plan::aggregates::group_values::GroupValues; use datafusion::physical_plan::aggregates::*; -use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec; -use datafusion::physical_plan::empty::EmptyExec; -use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType}; -use datafusion::physical_plan::projection::ProjectionExec; -use datafusion::physical_plan::sorts::sort::SortExec; -use datafusion::physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec; -use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use datafusion::physical_plan::udaf::AggregateFunctionExpr; -use datafusion::physical_plan::{ - collect, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PhysicalExpr, - PlanProperties, SendableRecordBatchStream, -}; -use datafusion::prelude::{and, SessionConfig, SessionContext}; -use datafusion_datasource::memory::MemorySourceConfig; -use datafusion_datasource::source::DataSourceExec; +use datafusion::physical_plan::{PhysicalExpr, SendableRecordBatchStream}; use futures::ready; -use futures::{ - stream::{Stream, StreamExt}, - Future, -}; -use itertools::Itertools; -use log::{debug, error, trace, warn}; -use mockall::automock; -use serde_derive::{Deserialize, Serialize}; -use std::any::Any; -use std::cmp::min; -use std::collections::{HashMap, HashSet}; -use std::fmt::{Debug, Formatter}; -use std::io::Cursor; -use std::mem::take; +use futures::stream::{Stream, StreamExt}; +use std::fmt::Debug; use std::sync::Arc; use std::task::{Context, Poll}; -use std::time::SystemTime; -use tarpc::context::current; -use tracing::{instrument, Instrument}; use super::new_sorted_group_values; use super::InlineAggregateExec; @@ -160,12 +71,6 @@ impl InlineAggregateStream { // aggregate let aggregate_arguments = aggregate_expressions(&agg.aggr_expr, &agg.mode, agg_group_by.num_group_exprs())?; - // arguments for aggregating spilled data is the same as the one for final aggregation - let merging_aggregate_arguments = aggregate_expressions( - &agg.aggr_expr, - &InlineAggregateMode::Final, - agg_group_by.num_group_exprs(), - )?; let filter_expressions = match agg.mode { InlineAggregateMode::Partial => agg_filter_expr, @@ -181,15 +86,6 @@ impl InlineAggregateStream { let group_schema = agg_group_by.group_schema(&agg.input().schema())?; - let partial_agg_schema = create_schema( - &agg.input().schema(), - &agg_group_by, - &aggregate_exprs, - InlineAggregateMode::Partial, - )?; - - let partial_agg_schema = Arc::new(partial_agg_schema); - let exec_state = ExecutionState::ReadingInput; let current_group_indices = Vec::with_capacity(batch_size); let group_values = new_sorted_group_values(group_schema)?; @@ -211,36 +107,6 @@ impl InlineAggregateStream { } } -fn create_schema( - input_schema: &Schema, - group_by: &PhysicalGroupBy, - aggr_expr: &[Arc], - mode: InlineAggregateMode, -) -> DFResult { - let mut fields = Vec::with_capacity(group_by.num_output_exprs() + aggr_expr.len()); - fields.extend(group_by.output_fields(input_schema)?); - - match mode { - InlineAggregateMode::Partial => { - // in partial mode, the fields of the accumulator's state - for expr in aggr_expr { - fields.extend(expr.state_fields()?.iter().cloned()); - } - } - InlineAggregateMode::Final => { - // in final mode, the field with the final result of the accumulator - for expr in aggr_expr { - fields.push(expr.field()) - } - } - } - - Ok(Schema::new_with_metadata( - fields, - input_schema.metadata().clone(), - )) -} - fn aggregate_expressions( aggr_expr: &[Arc], mode: &InlineAggregateMode, diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs index 74866b34065c3..e8ea319ec4605 100644 --- a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs +++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs @@ -6,108 +6,24 @@ mod sorted_group_values_rows; pub use sorted_group_values::SortedGroupValues; pub use sorted_group_values_rows::SortedGroupValuesRows; -use crate::cluster::{ - pick_worker_by_ids, pick_worker_by_partitions, Cluster, WorkerPlanningParams, -}; -use crate::config::injection::DIService; -use crate::config::ConfigObj; -use crate::metastore::multi_index::MultiPartition; -use crate::metastore::table::Table; -use crate::metastore::{Column, ColumnType, IdRow, Index, Partition}; -use crate::queryplanner::filter_by_key_range::FilterByKeyRangeExec; -use crate::queryplanner::merge_sort::LastRowByUniqueKeyExec; -use crate::queryplanner::metadata_cache::{MetadataCacheFactory, NoopParquetMetadataCache}; -use crate::queryplanner::optimizations::{CubeQueryPlanner, PreOptimizeRule}; -use crate::queryplanner::physical_plan_flags::PhysicalPlanFlags; -use crate::queryplanner::planning::{get_worker_plan, Snapshot, Snapshots}; -use crate::queryplanner::pretty_printers::{pp_phys_plan, pp_phys_plan_ext, pp_plan, PPOptions}; -use crate::queryplanner::serialized_plan::{IndexSnapshot, RowFilter, RowRange, SerializedPlan}; -use crate::queryplanner::trace_data_loaded::DataLoadedSize; -use crate::store::DataFrame; -use crate::table::data::rows_to_columns; -use crate::table::parquet::CubestoreParquetMetadataCache; -use crate::table::{Row, TableValue, TimestampValue}; -use crate::telemetry::suboptimal_query_plan_event; -use crate::util::memory::MemoryHandler; -use crate::{app_metrics, CubeError}; -use async_trait::async_trait; -use core::fmt; -use datafusion::arrow::array::{ - make_array, Array, ArrayRef, BinaryArray, BooleanArray, Decimal128Array, Float64Array, - Int16Array, Int32Array, Int64Array, MutableArrayData, NullArray, StringArray, - TimestampMicrosecondArray, TimestampNanosecondArray, UInt16Array, UInt32Array, UInt64Array, - UInt8Array, -}; -use datafusion::arrow::compute::SortOptions; -use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit}; -use datafusion::arrow::ipc::reader::StreamReader; -use datafusion::arrow::ipc::writer::StreamWriter; -use datafusion::arrow::record_batch::RecordBatch; -use datafusion::catalog::Session; +use datafusion::arrow::datatypes::{DataType, SchemaRef}; use datafusion::common::stats::Precision; -use datafusion::common::{Statistics, ToDFSchema}; -use datafusion::config::TableParquetOptions; -use datafusion::datasource::listing::PartitionedFile; -use datafusion::datasource::object_store::ObjectStoreUrl; -use datafusion::datasource::physical_plan::parquet::get_reader_options_customizer; -use datafusion::datasource::physical_plan::{ - FileScanConfig, ParquetFileReaderFactory, ParquetSource, -}; -use datafusion::datasource::{TableProvider, TableType}; -use datafusion::dfschema::{internal_err, not_impl_err}; -use datafusion::error::DataFusionError; +use datafusion::common::Statistics; use datafusion::error::Result as DFResult; use datafusion::execution::TaskContext; -use datafusion::logical_expr::{Expr, LogicalPlan}; -use datafusion::physical_expr; use datafusion::physical_expr::aggregate::AggregateFunctionExpr; -use datafusion::physical_expr::LexOrdering; -use datafusion::physical_expr::{ - Distribution, EquivalenceProperties, LexRequirement, PhysicalSortExpr, PhysicalSortRequirement, -}; -use datafusion::physical_optimizer::aggregate_statistics::AggregateStatistics; -use datafusion::physical_optimizer::combine_partial_final_agg::CombinePartialFinalAggregate; -use datafusion::physical_optimizer::enforce_sorting::EnforceSorting; -use datafusion::physical_optimizer::join_selection::JoinSelection; -use datafusion::physical_optimizer::limit_pushdown::LimitPushdown; -use datafusion::physical_optimizer::limited_distinct_aggregation::LimitedDistinctAggregation; -use datafusion::physical_optimizer::output_requirements::OutputRequirements; -use datafusion::physical_optimizer::projection_pushdown::ProjectionPushdown; -use datafusion::physical_optimizer::sanity_checker::SanityCheckPlan; -use datafusion::physical_optimizer::topk_aggregation::TopKAggregation; -use datafusion::physical_optimizer::update_aggr_exprs::OptimizeAggregateOrder; -use datafusion::physical_optimizer::PhysicalOptimizerRule; +use datafusion::physical_expr::{Distribution, LexRequirement}; use datafusion::physical_plan::aggregates::group_values::GroupValues; -use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec; -use datafusion::physical_plan::empty::EmptyExec; -use datafusion::physical_plan::execution_plan::{Boundedness, CardinalityEffect, EmissionType}; +use datafusion::physical_plan::execution_plan::CardinalityEffect; use datafusion::physical_plan::metrics::MetricsSet; -use datafusion::physical_plan::projection::ProjectionExec; -use datafusion::physical_plan::sorts::sort::SortExec; -use datafusion::physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec; -use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use datafusion::physical_plan::{aggregates::*, InputOrderMode}; use datafusion::physical_plan::{ - collect, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PhysicalExpr, - PlanProperties, SendableRecordBatchStream, + DisplayAs, DisplayFormatType, ExecutionPlan, PhysicalExpr, PlanProperties, + SendableRecordBatchStream, }; -use datafusion::prelude::{and, SessionConfig, SessionContext}; -use datafusion_datasource::memory::MemorySourceConfig; -use datafusion_datasource::source::DataSourceExec; -use futures_util::{stream, StreamExt, TryStreamExt}; -use itertools::Itertools; -use log::{debug, error, trace, warn}; -use mockall::automock; -use serde_derive::{Deserialize, Serialize}; use std::any::Any; -use std::cmp::min; -use std::collections::{HashMap, HashSet}; -use std::fmt::{Debug, Formatter}; -use std::io::Cursor; -use std::mem::take; +use std::fmt::Debug; use std::sync::Arc; -use std::time::SystemTime; -use tracing::{instrument, Instrument}; #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum InlineAggregateMode { @@ -202,6 +118,10 @@ impl InlineAggregateExec { pub fn input(&self) -> &Arc { &self.input } + + pub fn group_expr(&self) -> &PhysicalGroupBy { + &self.group_by + } } impl DisplayAs for InlineAggregateExec { diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs index d9064aaf9ce16..e7c0e82b2f7cb 100644 --- a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs +++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs @@ -1,19 +1,18 @@ use datafusion::logical_expr::EmitTo; use datafusion::physical_plan::aggregates::group_values::multi_group_by::GroupColumn; -use std::mem::{self, size_of}; +use std::mem::{self}; use datafusion::arrow::array::{Array, ArrayRef, RecordBatch}; use datafusion::arrow::compute::cast; use datafusion::arrow::datatypes::{ BinaryType, BinaryViewType, DataType, Date32Type, Date64Type, Decimal128Type, Float32Type, - Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, LargeBinaryType, LargeUtf8Type, Schema, + Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, LargeBinaryType, LargeUtf8Type, SchemaRef, StringViewType, Time32MillisecondType, Time32SecondType, Time64MicrosecondType, Time64NanosecondType, TimeUnit, TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type, Utf8Type, }; -use datafusion::dfschema::internal_err; use datafusion::dfschema::not_impl_err; use datafusion::error::{DataFusionError, Result as DFResult}; use datafusion::physical_expr::binary_map::OutputType; @@ -384,7 +383,7 @@ impl GroupValues for SortedGroupValues { Ok(output) } - fn clear_shrink(&mut self, batch: &RecordBatch) { + fn clear_shrink(&mut self, _batch: &RecordBatch) { self.group_values.clear(); self.comparators.clear(); self.rows_inds.clear(); diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values_rows.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values_rows.rs index 199cce192e587..cde67cdb88706 100644 --- a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values_rows.rs +++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values_rows.rs @@ -1,12 +1,10 @@ use datafusion::logical_expr::EmitTo; -use std::mem::{self, size_of}; use datafusion::arrow::array::{Array, ArrayRef, ListArray, RecordBatch, StructArray}; use datafusion::arrow::compute::cast; use datafusion::arrow::datatypes::{DataType, SchemaRef}; use datafusion::arrow::row::{RowConverter, Rows, SortField}; -use datafusion::dfschema::internal_err; -use datafusion::error::{DataFusionError, Result as DFResult}; +use datafusion::error::Result as DFResult; use datafusion::physical_plan::aggregates::group_values::GroupValues; use std::sync::Arc; @@ -52,8 +50,7 @@ impl SortedGroupValuesRows { let starting_rows_capacity = 1000; let starting_data_capacity = 64 * starting_rows_capacity; - let rows_buffer = - row_converter.empty_rows(starting_rows_capacity, starting_data_capacity); + let rows_buffer = row_converter.empty_rows(starting_rows_capacity, starting_data_capacity); Ok(Self { schema, @@ -169,8 +166,7 @@ impl GroupValues for SortedGroupValuesRows { // Handle dictionary encoding for output for (field, array) in self.schema.fields.iter().zip(&mut output) { let expected = field.data_type(); - *array = - dictionary_encode_if_necessary(Arc::::clone(array), expected)?; + *array = dictionary_encode_if_necessary(Arc::::clone(array), expected)?; } self.group_values = Some(group_values); @@ -185,10 +181,7 @@ impl GroupValues for SortedGroupValuesRows { } } -fn dictionary_encode_if_necessary( - array: ArrayRef, - expected: &DataType, -) -> DFResult { +fn dictionary_encode_if_necessary(array: ArrayRef, expected: &DataType) -> DFResult { match (expected, array.data_type()) { (DataType::Struct(expected_fields), _) => { let struct_array = array.as_any().downcast_ref::().unwrap(); diff --git a/rust/cubestore/cubestore/src/queryplanner/mod.rs b/rust/cubestore/cubestore/src/queryplanner/mod.rs index 464337c5bb5f3..f3f86adb4ba7f 100644 --- a/rust/cubestore/cubestore/src/queryplanner/mod.rs +++ b/rust/cubestore/cubestore/src/queryplanner/mod.rs @@ -22,6 +22,7 @@ use serialized_plan::PreSerializedPlan; pub use topk::MIN_TOPK_STREAM_ROWS; mod filter_by_key_range; pub mod info_schema; +mod inline_aggregate; pub mod merge_sort; pub mod metadata_cache; pub mod providers; @@ -30,7 +31,6 @@ mod rolling; mod test_utils; pub mod udf_xirr; pub mod udfs; -mod inline_aggregate; use crate::cachestore::CacheStore; use crate::config::injection::DIService; diff --git a/rust/cubestore/cubestore/src/queryplanner/optimizations/mod.rs b/rust/cubestore/cubestore/src/queryplanner/optimizations/mod.rs index 5746261938e1f..0359e64c476db 100644 --- a/rust/cubestore/cubestore/src/queryplanner/optimizations/mod.rs +++ b/rust/cubestore/cubestore/src/queryplanner/optimizations/mod.rs @@ -145,7 +145,7 @@ fn pre_optimize_physical_plan( let p = ensure_partition_merge(p)?; // Replace sorted AggregateExec with InlineAggregateExec for better performance - //let p = rewrite_physical_plan(p, &mut |p| replace_with_inline_aggregate(p))?; + let p = rewrite_physical_plan(p, &mut |p| replace_with_inline_aggregate(p))?; Ok(p) } diff --git a/rust/cubestore/cubestore/src/queryplanner/physical_plan_flags.rs b/rust/cubestore/cubestore/src/queryplanner/physical_plan_flags.rs index 67af1317dea67..e05791b7af7f4 100644 --- a/rust/cubestore/cubestore/src/queryplanner/physical_plan_flags.rs +++ b/rust/cubestore/cubestore/src/queryplanner/physical_plan_flags.rs @@ -8,6 +8,7 @@ use datafusion::physical_plan::{ExecutionPlan, InputOrderMode, PhysicalExpr}; use serde::Serialize; use serde_json::{json, Value}; +use crate::queryplanner::inline_aggregate::InlineAggregateExec; use crate::queryplanner::query_executor::CubeTableExec; #[derive(Serialize, Debug)] @@ -36,7 +37,14 @@ impl PhysicalPlanFlags { fn physical_plan_flags_fill(p: &dyn ExecutionPlan, flags: &mut PhysicalPlanFlags) { let a = p.as_any(); - if let Some(agg) = a.downcast_ref::() { + if let Some(agg) = a.downcast_ref::() { + flags.merge_sort_plan = true; + + // Stop the recursion if we have an optimal plan with groups, otherwise continue to check the children, filters for example + if agg.group_expr().expr().len() > 0 && flags.merge_sort_plan { + return; + } + } else if let Some(agg) = a.downcast_ref::() { let is_final_hash_agg_without_groups = agg.mode() == &AggregateMode::Final && agg.input_order_mode() == &InputOrderMode::Linear && agg.group_expr().expr().len() == 0; diff --git a/rust/cubestore/cubestore/src/queryplanner/pretty_printers.rs b/rust/cubestore/cubestore/src/queryplanner/pretty_printers.rs index d9c353d1d1095..25006bc3aeedb 100644 --- a/rust/cubestore/cubestore/src/queryplanner/pretty_printers.rs +++ b/rust/cubestore/cubestore/src/queryplanner/pretty_printers.rs @@ -610,7 +610,7 @@ fn pp_phys_plan_indented(p: &dyn ExecutionPlan, indent: usize, o: &PPOptions, ou InlineAggregateMode::Partial => "Partial", InlineAggregateMode::Final => "Final", }; - *out += &format!("{}InlineAggregate", mode); + *out += &format!("Inline{}Aggregate", mode); if o.show_aggregations { *out += &format!(", aggs: {:?}", agg.aggr_expr()) } diff --git a/rust/cubestore/cubezetasketch/src/data.rs b/rust/cubestore/cubezetasketch/src/data.rs index 3835ddaf3546a..15f938451b8f7 100644 --- a/rust/cubestore/cubezetasketch/src/data.rs +++ b/rust/cubestore/cubezetasketch/src/data.rs @@ -56,14 +56,14 @@ pub fn linear_counting_threshold(precision: i32) -> i32 { 350000, // precision 18 ]; - if MINIMUM_PRECISION <= precision && precision <= MAXIMUM_PRECISION { + if (MINIMUM_PRECISION..=MAXIMUM_PRECISION).contains(&precision) { return LINEAR_COUNTING_THRESHOLD[(precision - MINIMUM_PRECISION) as usize]; } // Fall back to the threshold of 5m/2 as used in the original HLL paper for precisions where // empirical thresholds have not yet been determined. See the HLL++ paper // (https://goo.gl/pc916Z) Section 5.2 for details. - return 5 * (1 << precision) / 2; + 5 * (1 << precision) / 2 } /// Returns the value of *α_m* (where *m = 2^precision*) as @@ -75,7 +75,7 @@ pub fn alpha(precision: i32) -> f64 { // // where m is 2 ^ precision. The values were taken verbatim from the Go // and C++ implementations. - return 0.7213 / (1. + 1.079 / (1 << precision) as f64); + 0.7213 / (1. + 1.079 / (1 << precision) as f64) } /// Returns the bias correction for the given estimate and precision. These values have been @@ -100,13 +100,13 @@ pub fn estimate_bias(estimate: f64, precision: i32) -> f64 { total_weight += 1.0 / bias.distance; sum += bias.bias / bias.distance; } - return sum / total_weight; + sum / total_weight } /// Returns 6 closest biases and their distance to the estimate, sorted by increasing distance. fn closest_biases(estimate: f64, precision: i32) -> Vec { // Return no bias correction when precision is out of defined bounds. - if precision < MINIMUM_PRECISION || MAXIMUM_PRECISION < precision { + if !(MINIMUM_PRECISION..=MAXIMUM_PRECISION).contains(&precision) { return Vec::new(); } @@ -145,7 +145,7 @@ fn closest_biases(estimate: f64, precision: i32) -> Vec { result.sort_by(|l, r| l.distance.partial_cmp(&r.distance).unwrap()); result.truncate(NUMBER_OF_NEIGHBORS_IN_KNN); - return result; + result } struct WeightedBias { diff --git a/rust/cubestore/cubezetasketch/src/difference_encoding.rs b/rust/cubestore/cubezetasketch/src/difference_encoding.rs index 7516732e90ba7..a44b7d67a5c8c 100644 --- a/rust/cubestore/cubezetasketch/src/difference_encoding.rs +++ b/rust/cubestore/cubezetasketch/src/difference_encoding.rs @@ -30,7 +30,7 @@ pub struct DifferenceEncoder<'l> { impl DifferenceEncoder<'_> { pub fn new(buf: &mut Vec) -> DifferenceEncoder { - return DifferenceEncoder { buf, last: 0 }; + DifferenceEncoder { buf, last: 0 } } /// Writes the integer value into the buffer using difference encoding. @@ -78,7 +78,7 @@ fn read_varint(data: &[u8]) -> Result<(/*result*/ u32, /*bytes read*/ usize)> { break; } } - return Ok((result, offset)); + Ok((result, offset)) } #[derive(Debug, Clone, Copy)] @@ -89,7 +89,7 @@ pub struct DifferenceDecoder<'l> { impl DifferenceDecoder<'_> { pub fn new(data: &[u8]) -> DifferenceDecoder { - return DifferenceDecoder { data, last: 0 }; + DifferenceDecoder { data, last: 0 } } } @@ -104,11 +104,11 @@ impl Iterator for DifferenceDecoder<'_> { Ok((n, cnt)) => { self.data = &self.data[cnt..]; self.last += n; - return Some(Ok(self.last)); + Some(Ok(self.last)) } Err(e) => { self.data = &[]; // stop on error. - return Some(Err(e)); + Some(Err(e)) } } } diff --git a/rust/cubestore/cubezetasketch/src/encoding.rs b/rust/cubestore/cubezetasketch/src/encoding.rs index 6e06eb5ea3e4a..9b80c1f0fec54 100644 --- a/rust/cubestore/cubezetasketch/src/encoding.rs +++ b/rust/cubestore/cubezetasketch/src/encoding.rs @@ -29,9 +29,9 @@ pub struct NormalEncoding { impl NormalEncoding { pub fn new(precision: i32) -> NormalEncoding { - assert!(1 <= precision && precision <= 63, + assert!((1..=63).contains(&precision), "valid index and rhoW can only be determined for precisions in the range [1, 63], but got {}", precision); - return NormalEncoding { precision }; + NormalEncoding { precision } } } @@ -81,13 +81,13 @@ impl SparseEncoding { // implementation uses signed or unsigned integers. The upper limit for the normal precision // is therefore 31 - RHOW_BITS - 1 (for flag). assert!( - 1 <= normal_precision && normal_precision <= 24, + (1..=24).contains(&normal_precision), "normal precision must be between 1 and 24 (inclusive), got {}", normal_precision ); // While for the sparse precision it is 31 - 1 (for flag). assert!( - 1 <= sparse_precision && sparse_precision <= 30, + (1..=30).contains(&sparse_precision), "sparse precision must be between 1 and 30 (inclusive), got {}", sparse_precision ); @@ -98,11 +98,11 @@ impl SparseEncoding { // non-rhoW encoded values so that (a) the two values can be distinguished and (b) they will // not interleave when sorted numerically. let rho_encoded_flag = 1 << max(sparse_precision, normal_precision + Self::RHOW_BITS); - return SparseEncoding { + SparseEncoding { normal_precision, sparse_precision, rho_encoded_flag, - }; + } } /// Checks whether a sparse encoding is compatible with another. @@ -124,16 +124,16 @@ impl SparseEncoding { pub(crate) fn decode_sparse_index(&self, sparse_value: i32) -> i32 { // If the sparse rhoW' is not encoded, then the value consists of just the sparse index. if (sparse_value & self.rho_encoded_flag) == 0 { - return sparse_value as i32; + return sparse_value; } // When the sparse rhoW' is encoded, this indicates that the last sp-p bits of the sparse // index were all zero. We return the normal index right zero padded by sp-p bits since the // sparse index is just the normal index without the trailing zeros. - return ((sparse_value ^ self.rho_encoded_flag) // Strip the encoding flag. + ((sparse_value ^ self.rho_encoded_flag) // Strip the encoding flag. >> Self::RHOW_BITS) // Strip the rhoW' // Shift the normal index to sparse index length. - << (self.sparse_precision - self.normal_precision); + << (self.sparse_precision - self.normal_precision) } /// Decodes the normal index from an encoded sparse value. See the class Javadoc for details on @@ -147,7 +147,7 @@ impl SparseEncoding { // Sparse rhoW' encoded values contain a normal index so we extract it by stripping the flag // off the front and the rhoW' off the end. - return (sparse_value ^ self.rho_encoded_flag) >> Self::RHOW_BITS; + (sparse_value ^ self.rho_encoded_flag) >> Self::RHOW_BITS } /// Decodes the normal *ρ(w)* from an encoded sparse value. See the class Javadoc for @@ -164,8 +164,7 @@ impl SparseEncoding { // If the sparse rhoW' was encoded, this tells us that the last sp-p bits of the // sparse index where all zero. The normal rhoW is therefore rhoW' + sp - p. - return ((sparse_value & Self::RHOW_MASK) + self.sparse_precision - self.normal_precision) - as u8; + ((sparse_value & Self::RHOW_MASK) + self.sparse_precision - self.normal_precision) as u8 } } @@ -175,9 +174,9 @@ fn compute_rho_w(value: u64, bits: i32) -> u8 { let w = value << (64 - bits); // If the rhoW consists only of zeros, return the maximum length of bits + 1. - return if w == 0 { + if w == 0 { bits as u8 + 1 } else { w.leading_zeros() as u8 + 1 - }; + } } diff --git a/rust/cubestore/cubezetasketch/src/error.rs b/rust/cubestore/cubezetasketch/src/error.rs index 988c94c068789..3e2fff989b7dd 100644 --- a/rust/cubestore/cubezetasketch/src/error.rs +++ b/rust/cubestore/cubezetasketch/src/error.rs @@ -32,26 +32,26 @@ impl Display for ZetaError { impl ZetaError { pub fn new(message: Str) -> ZetaError { - return ZetaError { + ZetaError { message: message.to_string(), - }; + } } } impl From for ZetaError { fn from(err: std::io::Error) -> Self { - return ZetaError::new(err); + ZetaError::new(err) } } impl From for ZetaError { fn from(err: ProtobufError) -> Self { - return ZetaError::new(format!("Protobuf: {}", err)); + ZetaError::new(format!("Protobuf: {}", err)) } } impl From for ZetaError { fn from(err: TryFromIntError) -> Self { - return ZetaError::new(err); + ZetaError::new(err) } } diff --git a/rust/cubestore/cubezetasketch/src/normal.rs b/rust/cubestore/cubezetasketch/src/normal.rs index 1bf1c3570bb0d..6dbc816a67923 100644 --- a/rust/cubestore/cubezetasketch/src/normal.rs +++ b/rust/cubestore/cubezetasketch/src/normal.rs @@ -47,15 +47,15 @@ impl NormalRepresentation { ))); } - return Ok(NormalRepresentation { + Ok(NormalRepresentation { encoding: NormalEncoding::new(state.precision), - }); + }) } /** * Checks that the precision is valid for a normal representation. */ pub fn check_precision(precision: i32) -> Result<()> { - if !(Self::MINIMUM_PRECISION <= precision && precision <= Self::MAXIMUM_PRECISION) { + if !(Self::MINIMUM_PRECISION..=Self::MAXIMUM_PRECISION).contains(&precision) { return Err(ZetaError::new(format!( "Expected normal precision to be >= {} and <= {} but was {}", Self::MINIMUM_PRECISION, @@ -63,7 +63,7 @@ impl NormalRepresentation { precision ))); } - return Ok(()); + Ok(()) } /// Computes the cardinality estimate according to the algorithm in Figure 6 of the HLL++ paper @@ -94,7 +94,7 @@ impl NormalRepresentation { "invalid byte in normal encoding: {}", v ); - sum += 1.0 / ((1 as u64) << (v as u64)) as f64; + sum += 1.0 / (1_u64 << (v as u64)) as f64; } // Return the LinearCount for small cardinalities where, as explained in the HLL++ paper @@ -113,7 +113,7 @@ impl NormalRepresentation { // Perform bias correction on small estimates. HyperLogLogPlusPlusData only contains bias // estimates for small cardinalities and returns 0 for anything else, so the "E < 5m" guard from // the HLL++ paper (https://goo.gl/pc916Z) is superfluous here. - return (estimate - estimate_bias(estimate, state.precision)).round() as u64; + (estimate - estimate_bias(estimate, state.precision)).round() as u64 } pub fn merge_with_sparse( @@ -124,10 +124,10 @@ impl NormalRepresentation { ) -> Result<()> { self.add_sparse_values( state, - &other.encoding(), + other.encoding(), SparseRepresentation::sorted_iterator(other_state.sparse_data.as_deref()), )?; - return Ok(()); + Ok(()) } /// Merges a HyperLogLog++ sourceData array into a state, downgrading the values from the source @@ -181,7 +181,7 @@ impl NormalRepresentation { } } - return Ok(()); + Ok(()) } fn ensure_data(state: &mut State) { diff --git a/rust/cubestore/cubezetasketch/src/sketch.rs b/rust/cubestore/cubezetasketch/src/sketch.rs index 9bfce2cd69eae..e7d8ffcfdf31d 100644 --- a/rust/cubestore/cubezetasketch/src/sketch.rs +++ b/rust/cubestore/cubezetasketch/src/sketch.rs @@ -62,9 +62,9 @@ pub enum Representation { impl Representation { fn from_state(state: &State) -> Result { if state.has_data() { - return Ok(Representation::Normal(NormalRepresentation::new(state)?)); + Ok(Representation::Normal(NormalRepresentation::new(state)?)) } else { - return Ok(Representation::Sparse(SparseRepresentation::new(state)?)); + Ok(Representation::Sparse(SparseRepresentation::new(state)?)) } } @@ -107,7 +107,7 @@ impl HyperLogLogPlusPlus { /// /// `proto` is a valid aggregator state of type `AggregatorType::HYPERLOGLOG_PLUS_UNIQUE`. pub fn read(proto: &[u8]) -> Result { - return Self::for_coded_input(CodedInputStream::from_bytes(proto)); + Self::for_coded_input(CodedInputStream::from_bytes(proto)) } pub fn write(&self) -> Vec { @@ -119,19 +119,19 @@ impl HyperLogLogPlusPlus { return state.to_byte_array(); } } - return self.state.to_byte_array(); + self.state.to_byte_array() } pub fn cardinality(&mut self) -> u64 { match &mut self.representation { - Representation::Sparse(r) => return r.cardinality(&mut self.state), - Representation::Normal(r) => return r.cardinality(&self.state), + Representation::Sparse(r) => r.cardinality(&mut self.state), + Representation::Normal(r) => r.cardinality(&self.state), } } pub fn is_compatible(&self, other: &HyperLogLogPlusPlus) -> bool { - return self.state.precision == other.state.precision - && self.state.sparse_precision == other.state.sparse_precision; + self.state.precision == other.state.precision + && self.state.sparse_precision == other.state.sparse_precision } /// Will crash if `self.is_compatible(other)` returns false. @@ -166,21 +166,21 @@ impl HyperLogLogPlusPlus { if let Some(n) = new_repr { self.representation = Representation::Normal(n) } - return Ok(()); + Ok(()) } fn for_coded_input(proto: CodedInputStream) -> Result { - return Self::from_state(State::parse_stream(proto)?); + Self::from_state(State::parse_stream(proto)?) } fn from_state(state: State) -> Result { - if !(state.type_ == AGGREGATOR_TYPE_HYPERLOGLOG_PLUS_UNIQUE) { + if state.type_ != AGGREGATOR_TYPE_HYPERLOGLOG_PLUS_UNIQUE { return Err(ZetaError::new(format!( "Expected proto to be of type HYPERLOGLOG_PLUS_UNIQUE but was {:?}", state.type_ ))); } - if !(state.encoding_version == Self::ENCODING_VERSION) { + if state.encoding_version != Self::ENCODING_VERSION { return Err(ZetaError::new(format!( "Expected encoding version to be {} but was {}", Self::ENCODING_VERSION, @@ -190,10 +190,10 @@ impl HyperLogLogPlusPlus { // TODO: implement or remove. // allowedTypes = Type.extractAndNormalize(state); let representation = Representation::from_state(&state)?; - return Ok(HyperLogLogPlusPlus { + Ok(HyperLogLogPlusPlus { state, representation, - }); + }) } /// Allocated size not including size_of::. Must be exact. diff --git a/rust/cubestore/cubezetasketch/src/sparse.rs b/rust/cubestore/cubezetasketch/src/sparse.rs index a20aa48ee4a52..263a3e6402313 100644 --- a/rust/cubestore/cubezetasketch/src/sparse.rs +++ b/rust/cubestore/cubezetasketch/src/sparse.rs @@ -103,7 +103,7 @@ impl SparseRepresentation { // Compute size limits for the encoded sparse data and temporary buffer relative to what the // normal representation would require (which is 2^p bytes). - if !(state.precision < 31) { + if state.precision >= 31 { return Err(ZetaError::new(format!( "expected precision < 31, got {}", state.precision @@ -126,16 +126,16 @@ impl SparseRepresentation { } // We have no good way of checking whether the data actually contains the given number of // elements without decoding the data, which would be inefficient here. - return Ok(SparseRepresentation { + Ok(SparseRepresentation { max_sparse_data_bytes, encoding, max_buffer_elements, buffer: BTreeSet::new(), - }); + }) } pub fn encoding(&self) -> &SparseEncoding { - return &self.encoding; + &self.encoding } fn check_precision(normal_precision: i32, sparse_precision: i32) -> Result<()> { @@ -150,7 +150,7 @@ impl SparseRepresentation { sparse_precision ))); } - return Ok(()); + Ok(()) } pub fn cardinality(&mut self, state: &mut State) -> u64 { @@ -163,7 +163,7 @@ impl SparseRepresentation { let num_zeros = buckets - state.sparse_size; let estimate = buckets as f64 * (buckets as f64 / num_zeros as f64).ln(); - return estimate.round() as u64; + estimate.round() as u64 } /// `self` may end up be in the invalid state on error and must not be used further. @@ -175,7 +175,7 @@ impl SparseRepresentation { ) -> Result> { // TODO: Add special case when 'this' is empty and 'other' has only encoded data. // In that case, we can just copy over the sparse data without needing to decode and dedupe. - return self.add_sparse_values(state, other, other_state); + self.add_sparse_values(state, other, other_state) } #[must_use] @@ -187,7 +187,7 @@ impl SparseRepresentation { ) -> Result> { let mut normal = self.normalize(state)?; normal.merge_with_normal(state, other, other_state); - return Ok(Some(normal)); + Ok(Some(normal)) } fn add_sparse_values( @@ -224,7 +224,7 @@ impl SparseRepresentation { )?; } // TODO: Merge without risking to grow this representation above its maximum size. - return Ok(self.update_representation(state)?); + self.update_representation(state) } fn merge_and_set( @@ -318,7 +318,7 @@ impl SparseRepresentation { } } let size = s.size; - return Self::set_sparse(state, data, size); + Self::set_sparse(state, data, size) } fn set_sparse(state: &mut State, data: Vec, size: i32) -> Result<()> { @@ -328,10 +328,10 @@ impl SparseRepresentation { } pub(crate) fn sorted_iterator(sparse_data: Option<&[u8]>) -> DifferenceDecoder { - return DifferenceDecoder::new(sparse_data.unwrap_or(&[])); + DifferenceDecoder::new(sparse_data.unwrap_or(&[])) } - fn buffer_iterator<'a>(&'a self) -> impl Iterator> + 'a { + fn buffer_iterator(&self) -> impl Iterator> + '_ { self.buffer.iter().map(|v| Ok(*v)) } @@ -364,7 +364,7 @@ impl SparseRepresentation { return Ok(Some(self.normalize(state)?)); } - return Ok(None); + Ok(None) } /// Convert to `NormalRepresentation`. @@ -384,7 +384,7 @@ impl SparseRepresentation { self.buffer.clear(); } - return Ok(representation); + Ok(representation) } pub fn requires_compaction(&self) -> bool { @@ -407,7 +407,7 @@ impl SparseRepresentation { self.buffer_iterator(), )?; self.buffer.clear(); - return Ok(()); + Ok(()) } /// Allocated size (not including size_of::). Must be exact. diff --git a/rust/cubestore/cubezetasketch/src/state.rs b/rust/cubestore/cubezetasketch/src/state.rs index 8d001a8fc727f..755024142b774 100644 --- a/rust/cubestore/cubezetasketch/src/state.rs +++ b/rust/cubestore/cubezetasketch/src/state.rs @@ -61,7 +61,7 @@ pub struct State { impl Default for State { fn default() -> Self { - return State { + State { type_: DEFAULT_TYPE, num_values: DEFAULT_NUM_VALUES, encoding_version: DEFAULT_ENCODING_VERSION, @@ -71,7 +71,7 @@ impl Default for State { sparse_precision: DEFAULT_SPARSE_PRECISION_OR_NUM_BUCKETS, data: None, sparse_data: None, - }; + } } } @@ -134,7 +134,7 @@ const DEFAULT_SPARSE_PRECISION_OR_NUM_BUCKETS: i32 = 0; impl State { // TODO: remove, change data from Option<> to Vec<> pub fn has_data(&self) -> bool { - return self.data.is_some() && !self.data.as_ref().unwrap().is_empty(); + self.data.is_some() && !self.data.as_ref().unwrap().is_empty() } /// Parses a serialized HyperLogLog++ `AggregatorStateProto` and populates this object's @@ -161,7 +161,7 @@ impl State { } } - return Ok(s); + Ok(s) } /// Parses a `HyperLogLogPlusUniqueStateProto` message. Since the message is nested within an @@ -182,7 +182,7 @@ impl State { _ => input.skip_field(wire_type)?, } } - return Ok(()); + Ok(()) } pub fn to_byte_array(&self) -> Vec { @@ -191,7 +191,7 @@ impl State { let mut output = CodedOutputStream::bytes(result.as_mut_slice()); self.write_to(hll_size, &mut output); output.check_eof(); - return result; + result } fn write_to(&self, hll_size: u32, stream: &mut CodedOutputStream) { @@ -279,7 +279,7 @@ impl State { size += hll_size.len_varint(); size += hll_size; - return (size, hll_size); + (size, hll_size) } fn get_serialized_hll_size(&self) -> u32 { @@ -312,7 +312,7 @@ impl State { size += sparse_data.len() as u32; } - return size; + size } /// Allocated size not including size_of::(). Must be exact (or worst-case). @@ -323,10 +323,10 @@ impl State { let mut sum = 0; if let Some(d) = &self.data { - sum += vec_alloc_size(&d); + sum += vec_alloc_size(d); } if let Some(sd) = &self.sparse_data { - sum += vec_alloc_size(&sd); + sum += vec_alloc_size(sd); } sum }