From b6a0ff201b9394e18f33cac7718cb118698f8e6a Mon Sep 17 00:00:00 2001
From: Alexandr Romanenko <alex.romanenko@cube.dev>
Date: Thu, 16 Oct 2025 19:36:04 +0200
Subject: [PATCH 1/9] in work

---
 rust/cubestore/Cargo.lock                     |  41 --
 rust/cubestore/cubestore/Cargo.toml           |  12 +-
 rust/cubestore/cubestore/src/config/mod.rs    |  52 +-
 rust/cubestore/cubestore/src/import/mod.rs    |  24 +-
 rust/cubestore/cubestore/src/lib.rs           |   2 +-
 .../inline_aggregate_stream.rs                | 611 ++++++++++++++++++
 .../src/queryplanner/inline_aggregate/mod.rs  | 281 ++++++++
 .../inline_aggregate/sorted_group_values.rs   | 304 +++++++++
 .../cubestore/src/queryplanner/mod.rs         |   3 +-
 .../src/queryplanner/pretty_printers.rs       |   4 +-
 .../src/queryplanner/query_executor.rs        |   3 +-
 rust/cubestore/rust-toolchain.toml            |   2 +-
 12 files changed, 1247 insertions(+), 92 deletions(-)
 create mode 100644 rust/cubestore/cubestore/src/queryplanner/inline_aggregate/inline_aggregate_stream.rs
 create mode 100644 rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs
 create mode 100644 rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs

diff --git a/rust/cubestore/Cargo.lock b/rust/cubestore/Cargo.lock
index 7cd0e2c9ddcec..6a64ae7e5efe8 100644
--- a/rust/cubestore/Cargo.lock
+++ b/rust/cubestore/Cargo.lock
@@ -219,7 +219,6 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
 [[package]]
 name = "arrow"
 version = "54.2.1"
-source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631"
 dependencies = [
  "arrow-arith",
  "arrow-array",
@@ -239,7 +238,6 @@ dependencies = [
 [[package]]
 name = "arrow-arith"
 version = "54.2.1"
-source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -252,7 +250,6 @@ dependencies = [
 [[package]]
 name = "arrow-array"
 version = "54.2.1"
-source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631"
 dependencies = [
  "ahash 0.8.11",
  "arrow-buffer",
@@ -268,7 +265,6 @@ dependencies = [
 [[package]]
 name = "arrow-buffer"
 version = "54.2.1"
-source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631"
 dependencies = [
  "bytes 1.10.1",
  "half 2.4.1",
@@ -278,7 +274,6 @@ dependencies = [
 [[package]]
 name = "arrow-cast"
 version = "54.2.1"
-source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -298,7 +293,6 @@ dependencies = [
 [[package]]
 name = "arrow-csv"
 version = "54.2.1"
-source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631"
 dependencies = [
  "arrow-array",
  "arrow-cast",
@@ -313,7 +307,6 @@ dependencies = [
 [[package]]
 name = "arrow-data"
 version = "54.2.1"
-source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631"
 dependencies = [
  "arrow-buffer",
  "arrow-schema",
@@ -324,7 +317,6 @@ dependencies = [
 [[package]]
 name = "arrow-ipc"
 version = "54.2.1"
-source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -337,7 +329,6 @@ dependencies = [
 [[package]]
 name = "arrow-json"
 version = "54.2.1"
-source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -356,7 +347,6 @@ dependencies = [
 [[package]]
 name = "arrow-ord"
 version = "54.2.1"
-source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -368,7 +358,6 @@ dependencies = [
 [[package]]
 name = "arrow-row"
 version = "54.2.1"
-source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -380,7 +369,6 @@ dependencies = [
 [[package]]
 name = "arrow-schema"
 version = "54.2.1"
-source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631"
 dependencies = [
  "serde",
 ]
@@ -388,7 +376,6 @@ dependencies = [
 [[package]]
 name = "arrow-select"
 version = "54.2.1"
-source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631"
 dependencies = [
  "ahash 0.8.11",
  "arrow-array",
@@ -401,7 +388,6 @@ dependencies = [
 [[package]]
 name = "arrow-string"
 version = "54.2.1"
-source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -1723,7 +1709,6 @@ checksum = "c2e66c9d817f1720209181c316d28635c050fa304f9c79e47a520882661b7308"
 [[package]]
 name = "datafusion"
 version = "46.0.1"
-source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3"
 dependencies = [
  "arrow",
  "arrow-ipc",
@@ -1776,7 +1761,6 @@ dependencies = [
 [[package]]
 name = "datafusion-catalog"
 version = "46.0.1"
-source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1795,7 +1779,6 @@ dependencies = [
 [[package]]
 name = "datafusion-catalog-listing"
 version = "46.0.1"
-source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1816,7 +1799,6 @@ dependencies = [
 [[package]]
 name = "datafusion-common"
 version = "46.0.1"
-source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3"
 dependencies = [
  "ahash 0.8.11",
  "arrow",
@@ -1839,7 +1821,6 @@ dependencies = [
 [[package]]
 name = "datafusion-common-runtime"
 version = "46.0.1"
-source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3"
 dependencies = [
  "log",
  "tokio",
@@ -1848,7 +1829,6 @@ dependencies = [
 [[package]]
 name = "datafusion-datasource"
 version = "46.0.1"
-source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3"
 dependencies = [
  "arrow",
  "async-compression 0.4.17",
@@ -1881,12 +1861,10 @@ dependencies = [
 [[package]]
 name = "datafusion-doc"
 version = "46.0.1"
-source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3"
 
 [[package]]
 name = "datafusion-execution"
 version = "46.0.1"
-source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3"
 dependencies = [
  "arrow",
  "dashmap",
@@ -1906,7 +1884,6 @@ dependencies = [
 [[package]]
 name = "datafusion-expr"
 version = "46.0.1"
-source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3"
 dependencies = [
  "arrow",
  "chrono",
@@ -1926,7 +1903,6 @@ dependencies = [
 [[package]]
 name = "datafusion-expr-common"
 version = "46.0.1"
-source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -1938,7 +1914,6 @@ dependencies = [
 [[package]]
 name = "datafusion-functions"
 version = "46.0.1"
-source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3"
 dependencies = [
  "arrow",
  "arrow-buffer",
@@ -1966,7 +1941,6 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-aggregate"
 version = "46.0.1"
-source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3"
 dependencies = [
  "ahash 0.8.11",
  "arrow",
@@ -1986,7 +1960,6 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-aggregate-common"
 version = "46.0.1"
-source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3"
 dependencies = [
  "ahash 0.8.11",
  "arrow",
@@ -1998,7 +1971,6 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-nested"
 version = "46.0.1"
-source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3"
 dependencies = [
  "arrow",
  "arrow-ord",
@@ -2018,7 +1990,6 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-table"
 version = "46.0.1"
-source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2033,7 +2004,6 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-window"
 version = "46.0.1"
-source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3"
 dependencies = [
  "datafusion-common",
  "datafusion-doc",
@@ -2049,7 +2019,6 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-window-common"
 version = "46.0.1"
-source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3"
 dependencies = [
  "datafusion-common",
  "datafusion-physical-expr-common",
@@ -2058,7 +2027,6 @@ dependencies = [
 [[package]]
 name = "datafusion-macros"
 version = "46.0.1"
-source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3"
 dependencies = [
  "datafusion-expr",
  "quote",
@@ -2068,7 +2036,6 @@ dependencies = [
 [[package]]
 name = "datafusion-optimizer"
 version = "46.0.1"
-source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3"
 dependencies = [
  "arrow",
  "chrono",
@@ -2086,7 +2053,6 @@ dependencies = [
 [[package]]
 name = "datafusion-physical-expr"
 version = "46.0.1"
-source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3"
 dependencies = [
  "ahash 0.8.11",
  "arrow",
@@ -2107,7 +2073,6 @@ dependencies = [
 [[package]]
 name = "datafusion-physical-expr-common"
 version = "46.0.1"
-source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3"
 dependencies = [
  "ahash 0.8.11",
  "arrow",
@@ -2120,7 +2085,6 @@ dependencies = [
 [[package]]
 name = "datafusion-physical-optimizer"
 version = "46.0.1"
-source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2138,7 +2102,6 @@ dependencies = [
 [[package]]
 name = "datafusion-physical-plan"
 version = "46.0.1"
-source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3"
 dependencies = [
  "ahash 0.8.11",
  "arrow",
@@ -2170,7 +2133,6 @@ dependencies = [
 [[package]]
 name = "datafusion-proto"
 version = "46.0.1"
-source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3"
 dependencies = [
  "arrow",
  "chrono",
@@ -2185,7 +2147,6 @@ dependencies = [
 [[package]]
 name = "datafusion-proto-common"
 version = "46.0.1"
-source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2195,7 +2156,6 @@ dependencies = [
 [[package]]
 name = "datafusion-sql"
 version = "46.0.1"
-source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#7c9a7a61516fdc8094931bcbb9c4d45898ae12f3"
 dependencies = [
  "arrow",
  "bigdecimal 0.4.8",
@@ -4554,7 +4514,6 @@ dependencies = [
 [[package]]
 name = "parquet"
 version = "54.2.1"
-source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631"
 dependencies = [
  "aes-gcm",
  "ahash 0.8.11",
diff --git a/rust/cubestore/cubestore/Cargo.toml b/rust/cubestore/cubestore/Cargo.toml
index e6a307ac53e3e..83834c7a7e827 100644
--- a/rust/cubestore/cubestore/Cargo.toml
+++ b/rust/cubestore/cubestore/Cargo.toml
@@ -28,10 +28,10 @@ cubezetasketch = { path = "../cubezetasketch" }
 cubedatasketches = { path = "../cubedatasketches" }
 cubeshared = { path = "../../cubeshared" }
 cuberpc = { path = "../cuberpc" }
-datafusion = { git = "https://github.com/cube-js/arrow-datafusion", branch = "cube-46.0.1", features = ["serde"] }
-datafusion-datasource = { git = "https://github.com/cube-js/arrow-datafusion", branch = "cube-46.0.1" }
-datafusion-proto = { git = "https://github.com/cube-js/arrow-datafusion", branch = "cube-46.0.1" }
-datafusion-proto-common = { git = "https://github.com/cube-js/arrow-datafusion", branch = "cube-46.0.1" }
+datafusion = { path = "/Users/war/cube_projects/new_cube/arrow-datafusion/datafusion/core/", features = ["serde"] }
+datafusion-datasource = { path = "/Users/war/cube_projects/new_cube/arrow-datafusion/datafusion/datasource/" }
+datafusion-proto = { path = "/Users/war/cube_projects/new_cube/arrow-datafusion/datafusion/proto/" }
+datafusion-proto-common = { path = "/Users/war/cube_projects/new_cube/arrow-datafusion/datafusion/proto-common/" }
 csv = "1.1.3"
 bytes = "1.6.0"
 serde_json = "1.0.56"
@@ -120,8 +120,8 @@ sasl2-sys = { version = "0.1.6", features = ["vendored"] }
 rdkafka = { version = "0.29.0", features = ["cmake-build"] }
 
 [target.'cfg(target_os = "macos")'.dependencies]
-rdkafka = { version = "0.29.0", features = ["ssl", "gssapi"] }
-sasl2-sys = { version = "0.1.6", features = ["vendored"] }
+#rdkafka = { version = "0.29.0", features = ["ssl", "gssapi"] }
+#sasl2-sys = { version = "0.1.6", features = ["vendored"] }
 
 [dev-dependencies]
 pretty_assertions = "0.7.1"
diff --git a/rust/cubestore/cubestore/src/config/mod.rs b/rust/cubestore/cubestore/src/config/mod.rs
index ac70c8b948667..f378a7e376767 100644
--- a/rust/cubestore/cubestore/src/config/mod.rs
+++ b/rust/cubestore/cubestore/src/config/mod.rs
@@ -36,8 +36,8 @@ use crate::sql::{SqlService, SqlServiceImpl};
 use crate::sql::{TableExtensionService, TableExtensionServiceImpl};
 use crate::store::compaction::{CompactionService, CompactionServiceImpl};
 use crate::store::{ChunkDataStore, ChunkStore, WALDataStore, WALStore};
-use crate::streaming::kafka::{KafkaClientService, KafkaClientServiceImpl};
-use crate::streaming::{KsqlClient, KsqlClientImpl, StreamingService, StreamingServiceImpl};
+/* use crate::streaming::kafka::{KafkaClientService, KafkaClientServiceImpl};
+use crate::streaming::{KsqlClient, KsqlClientImpl, StreamingService, StreamingServiceImpl}; */
 use crate::table::parquet::{
     CubestoreMetadataCacheFactory, CubestoreMetadataCacheFactoryImpl,
     CubestoreParquetMetadataCache, CubestoreParquetMetadataCacheImpl,
@@ -2194,7 +2194,7 @@ impl Config {
             .register_typed::<dyn ImportService, _, _, _>(async move |i| {
                 ImportServiceImpl::new(
                     i.get_service_typed().await,
-                    i.get_service_typed().await,
+                    //i.get_service_typed().await,
                     i.get_service_typed().await,
                     i.get_service_typed().await,
                     i.get_service_typed().await,
@@ -2210,31 +2210,31 @@ impl Config {
             })
             .await;
 
-        self.injector
-            .register_typed::<dyn StreamingService, _, _, _>(async move |i| {
-                StreamingServiceImpl::new(
-                    i.get_service_typed().await,
-                    i.get_service_typed().await,
-                    i.get_service_typed().await,
-                    i.get_service_typed().await,
-                    i.get_service_typed().await,
-                    i.get_service_typed::<dyn CubestoreMetadataCacheFactory>()
-                        .await
-                        .cache_factory()
-                        .clone(),
-                )
-            })
-            .await;
+        /* self.injector
+        .register_typed::<dyn StreamingService, _, _, _>(async move |i| {
+            StreamingServiceImpl::new(
+                i.get_service_typed().await,
+                i.get_service_typed().await,
+                i.get_service_typed().await,
+                i.get_service_typed().await,
+                i.get_service_typed().await,
+                i.get_service_typed::<dyn CubestoreMetadataCacheFactory>()
+                    .await
+                    .cache_factory()
+                    .clone(),
+            )
+        })
+        .await; */
 
-        self.injector
-            .register_typed::<dyn KsqlClient, _, _, _>(async move |_| KsqlClientImpl::new())
-            .await;
+        /* self.injector
+        .register_typed::<dyn KsqlClient, _, _, _>(async move |_| KsqlClientImpl::new())
+        .await; */
 
-        self.injector
-            .register_typed::<dyn KafkaClientService, _, _, _>(async move |i| {
-                KafkaClientServiceImpl::new(i.get_service_typed().await)
-            })
-            .await;
+        /* self.injector
+        .register_typed::<dyn KafkaClientService, _, _, _>(async move |i| {
+            KafkaClientServiceImpl::new(i.get_service_typed().await)
+        })
+        .await; */
 
         self.injector
             .register_typed::<dyn ProcessRateLimiter, _, _, _>(async move |_| {
diff --git a/rust/cubestore/cubestore/src/import/mod.rs b/rust/cubestore/cubestore/src/import/mod.rs
index f994aeee54301..8a2c4b811504f 100644
--- a/rust/cubestore/cubestore/src/import/mod.rs
+++ b/rust/cubestore/cubestore/src/import/mod.rs
@@ -36,7 +36,7 @@ use crate::queryplanner::trace_data_loaded::DataLoadedSize;
 use crate::remotefs::RemoteFs;
 use crate::sql::timestamp_from_string;
 use crate::store::ChunkDataStore;
-use crate::streaming::StreamingService;
+//use crate::streaming::StreamingService;
 use crate::table::data::{append_row, create_array_builders};
 use crate::table::{Row, TableValue};
 use crate::util::batch_memory::columns_vec_buffer_size;
@@ -517,7 +517,7 @@ crate::di_service!(MockImportService, [ImportService]);
 
 pub struct ImportServiceImpl {
     meta_store: Arc<dyn MetaStore>,
-    streaming_service: Arc<dyn StreamingService>,
+    //streaming_service: Arc<dyn StreamingService>,
     chunk_store: Arc<dyn ChunkDataStore>,
     remote_fs: Arc<dyn RemoteFs>,
     config_obj: Arc<dyn ConfigObj>,
@@ -530,7 +530,7 @@ crate::di_service!(ImportServiceImpl, [ImportService]);
 impl ImportServiceImpl {
     pub fn new(
         meta_store: Arc<dyn MetaStore>,
-        streaming_service: Arc<dyn StreamingService>,
+        //streaming_service: Arc<dyn StreamingService>,
         chunk_store: Arc<dyn ChunkDataStore>,
         remote_fs: Arc<dyn RemoteFs>,
         config_obj: Arc<dyn ConfigObj>,
@@ -539,7 +539,7 @@ impl ImportServiceImpl {
     ) -> Arc<ImportServiceImpl> {
         Arc::new(ImportServiceImpl {
             meta_store,
-            streaming_service,
+            //streaming_service,
             chunk_store,
             remote_fs,
             config_obj,
@@ -823,13 +823,13 @@ impl ImportService for ImportServiceImpl {
                 table, location
             )));
         }
-        if Table::is_stream_location(location) {
+        /* if Table::is_stream_location(location) {
             self.streaming_service.stream_table(table, location).await?;
-        } else {
-            self.do_import(&table, *format, location, data_loaded_size.clone())
-                .await?;
-            self.drop_temp_uploads(&location).await?;
-        }
+        } else { */
+        self.do_import(&table, *format, location, data_loaded_size.clone())
+            .await?;
+        self.drop_temp_uploads(&location).await?;
+        //}
 
         Ok(())
     }
@@ -840,11 +840,11 @@ impl ImportService for ImportServiceImpl {
         location: &str,
     ) -> Result<(), CubeError> {
         let table = self.meta_store.get_table_by_id(table_id).await?;
-        if Table::is_stream_location(location) {
+        /* if Table::is_stream_location(location) {
             self.streaming_service
                 .validate_table_location(table, location)
                 .await?;
-        }
+        } */
         Ok(())
     }
 
diff --git a/rust/cubestore/cubestore/src/lib.rs b/rust/cubestore/cubestore/src/lib.rs
index bb9e124341848..c79c44fd4b2e7 100644
--- a/rust/cubestore/cubestore/src/lib.rs
+++ b/rust/cubestore/cubestore/src/lib.rs
@@ -44,7 +44,7 @@ pub mod scheduler;
 pub mod shared;
 pub mod sql;
 pub mod store;
-pub mod streaming;
+//pub mod streaming;
 pub mod sys;
 pub mod table;
 pub mod telemetry;
diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/inline_aggregate_stream.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/inline_aggregate_stream.rs
new file mode 100644
index 0000000000000..4da0b9b48f7b0
--- /dev/null
+++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/inline_aggregate_stream.rs
@@ -0,0 +1,611 @@
+use crate::cluster::{
+    pick_worker_by_ids, pick_worker_by_partitions, Cluster, WorkerPlanningParams,
+};
+use crate::config::injection::DIService;
+use crate::config::ConfigObj;
+use crate::metastore::multi_index::MultiPartition;
+use crate::metastore::table::Table;
+use crate::metastore::{Column, ColumnType, IdRow, Index, Partition};
+use crate::queryplanner::filter_by_key_range::FilterByKeyRangeExec;
+use crate::queryplanner::merge_sort::LastRowByUniqueKeyExec;
+use crate::queryplanner::metadata_cache::{MetadataCacheFactory, NoopParquetMetadataCache};
+use crate::queryplanner::optimizations::{CubeQueryPlanner, PreOptimizeRule};
+use crate::queryplanner::physical_plan_flags::PhysicalPlanFlags;
+use crate::queryplanner::planning::{get_worker_plan, Snapshot, Snapshots};
+use crate::queryplanner::pretty_printers::{pp_phys_plan, pp_phys_plan_ext, pp_plan, PPOptions};
+use crate::queryplanner::serialized_plan::{IndexSnapshot, RowFilter, RowRange, SerializedPlan};
+use crate::queryplanner::trace_data_loaded::DataLoadedSize;
+use crate::store::DataFrame;
+use crate::table::data::rows_to_columns;
+use crate::table::parquet::CubestoreParquetMetadataCache;
+use crate::table::{Row, TableValue, TimestampValue};
+use crate::telemetry::suboptimal_query_plan_event;
+use crate::util::memory::MemoryHandler;
+use crate::{app_metrics, CubeError};
+use async_trait::async_trait;
+use core::fmt;
+use datafusion::arrow::array::AsArray;
+use datafusion::arrow::array::{
+    make_array, Array, ArrayRef, BinaryArray, BooleanArray, Decimal128Array, Float64Array,
+    Int16Array, Int32Array, Int64Array, MutableArrayData, NullArray, StringArray,
+    TimestampMicrosecondArray, TimestampNanosecondArray, UInt16Array, UInt32Array, UInt64Array,
+    UInt8Array,
+};
+use datafusion::arrow::compute::SortOptions;
+use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit};
+use datafusion::arrow::ipc::reader::StreamReader;
+use datafusion::arrow::ipc::writer::StreamWriter;
+use datafusion::arrow::record_batch::RecordBatch;
+use datafusion::catalog::Session;
+use datafusion::common::ToDFSchema;
+use datafusion::config::TableParquetOptions;
+use datafusion::datasource::listing::PartitionedFile;
+use datafusion::datasource::object_store::ObjectStoreUrl;
+use datafusion::datasource::physical_plan::parquet::get_reader_options_customizer;
+use datafusion::datasource::physical_plan::{
+    FileScanConfig, ParquetFileReaderFactory, ParquetSource,
+};
+use datafusion::datasource::{TableProvider, TableType};
+use datafusion::dfschema::internal_err;
+use datafusion::dfschema::not_impl_err;
+use datafusion::error::DataFusionError;
+use datafusion::error::Result as DFResult;
+use datafusion::execution::TaskContext;
+use datafusion::logical_expr::{Expr, GroupsAccumulator, LogicalPlan};
+use datafusion::physical_expr::expressions::Column as DFColumn;
+use datafusion::physical_expr::LexOrdering;
+use datafusion::physical_expr::{self, GroupsAccumulatorAdapter};
+use datafusion::physical_expr::{
+    Distribution, EquivalenceProperties, LexRequirement, PhysicalSortExpr, PhysicalSortRequirement,
+};
+use datafusion::physical_optimizer::aggregate_statistics::AggregateStatistics;
+use datafusion::physical_optimizer::combine_partial_final_agg::CombinePartialFinalAggregate;
+use datafusion::physical_optimizer::enforce_sorting::EnforceSorting;
+use datafusion::physical_optimizer::join_selection::JoinSelection;
+use datafusion::physical_optimizer::limit_pushdown::LimitPushdown;
+use datafusion::physical_optimizer::limited_distinct_aggregation::LimitedDistinctAggregation;
+use datafusion::physical_optimizer::output_requirements::OutputRequirements;
+use datafusion::physical_optimizer::projection_pushdown::ProjectionPushdown;
+use datafusion::physical_optimizer::sanity_checker::SanityCheckPlan;
+use datafusion::physical_optimizer::topk_aggregation::TopKAggregation;
+use datafusion::physical_optimizer::update_aggr_exprs::OptimizeAggregateOrder;
+use datafusion::physical_optimizer::PhysicalOptimizerRule;
+use datafusion::physical_plan::aggregates::*;
+use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec;
+use datafusion::physical_plan::empty::EmptyExec;
+use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType};
+use datafusion::physical_plan::projection::ProjectionExec;
+use datafusion::physical_plan::sorts::sort::SortExec;
+use datafusion::physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
+use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
+use datafusion::physical_plan::udaf::AggregateFunctionExpr;
+use datafusion::physical_plan::{
+    collect, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PhysicalExpr,
+    PlanProperties, SendableRecordBatchStream,
+};
+use datafusion::prelude::{and, SessionConfig, SessionContext};
+use datafusion_datasource::memory::MemorySourceConfig;
+use datafusion_datasource::source::DataSourceExec;
+use futures::ready;
+use futures::{
+    stream::{Stream, StreamExt},
+    Future,
+};
+use itertools::Itertools;
+use log::{debug, error, trace, warn};
+use mockall::automock;
+use serde_derive::{Deserialize, Serialize};
+use std::any::Any;
+use std::cmp::min;
+use std::collections::{HashMap, HashSet};
+use std::fmt::{Debug, Formatter};
+use std::io::Cursor;
+use std::mem::take;
+use std::sync::Arc;
+use std::task::{Context, Poll};
+use std::time::SystemTime;
+use tarpc::context::current;
+use tracing::{instrument, Instrument};
+
+use super::InlineAggregateExec;
+use super::InlineAggregateMode;
+
+#[derive(Debug, Clone)]
+pub(crate) enum ExecutionState {
+    ReadingInput,
+    ProducingOutput(RecordBatch),
+    Done,
+}
+
+pub(crate) struct InlineAggregateStream {
+    schema: SchemaRef,
+    input: SendableRecordBatchStream,
+    mode: InlineAggregateMode,
+
+    aggregate_arguments: Vec<Vec<Arc<dyn PhysicalExpr>>>,
+
+    filter_expressions: Vec<Option<Arc<dyn PhysicalExpr>>>,
+
+    group_by: PhysicalGroupBy,
+
+    batch_size: usize,
+
+    exec_state: ExecutionState,
+
+    input_done: bool,
+
+    accumulators: Vec<Box<dyn GroupsAccumulator>>,
+    current_group_indices: Vec<usize>,
+}
+
+impl InlineAggregateStream {
+    pub fn new(
+        agg: &InlineAggregateExec,
+        context: Arc<TaskContext>,
+        partition: usize,
+    ) -> DFResult<Self> {
+        let agg_schema = Arc::clone(&agg.schema);
+        let agg_group_by = agg.group_by.clone();
+        let agg_filter_expr = agg.filter_expr.clone();
+
+        let batch_size = context.session_config().batch_size();
+        let input = agg.input.execute(partition, Arc::clone(&context))?;
+
+        let aggregate_exprs = agg.aggr_expr.clone();
+
+        // arguments for each aggregate, one vec of expressions per
+        // aggregate
+        let aggregate_arguments =
+            aggregate_expressions(&agg.aggr_expr, &agg.mode, agg_group_by.num_group_exprs())?;
+        // arguments for aggregating spilled data is the same as the one for final aggregation
+        let merging_aggregate_arguments = aggregate_expressions(
+            &agg.aggr_expr,
+            &InlineAggregateMode::Final,
+            agg_group_by.num_group_exprs(),
+        )?;
+
+        let filter_expressions = match agg.mode {
+            InlineAggregateMode::Partial => agg_filter_expr,
+            InlineAggregateMode::Final => {
+                vec![None; agg.aggr_expr.len()]
+            }
+        };
+
+        let accumulators: Vec<_> = aggregate_exprs
+            .iter()
+            .map(create_group_accumulator)
+            .collect::<DFResult<_>>()?;
+
+        let group_schema = agg_group_by.group_schema(&agg.input().schema())?;
+
+        let partial_agg_schema = create_schema(
+            &agg.input().schema(),
+            &agg_group_by,
+            &aggregate_exprs,
+            InlineAggregateMode::Partial,
+        )?;
+
+        let partial_agg_schema = Arc::new(partial_agg_schema);
+
+        let exec_state = ExecutionState::ReadingInput;
+        let current_group_indices = Vec::with_capacity(batch_size);
+
+        Ok(InlineAggregateStream {
+            schema: agg_schema,
+            input,
+            mode: agg.mode,
+            accumulators,
+            aggregate_arguments,
+            filter_expressions,
+            group_by: agg_group_by,
+            exec_state,
+            batch_size,
+            current_group_indices,
+            input_done: false,
+        })
+    }
+}
+
+fn create_schema(
+    input_schema: &Schema,
+    group_by: &PhysicalGroupBy,
+    aggr_expr: &[Arc<AggregateFunctionExpr>],
+    mode: InlineAggregateMode,
+) -> DFResult<Schema> {
+    let mut fields = Vec::with_capacity(group_by.num_output_exprs() + aggr_expr.len());
+    fields.extend(group_by.output_fields(input_schema)?);
+
+    match mode {
+        InlineAggregateMode::Partial => {
+            // in partial mode, the fields of the accumulator's state
+            for expr in aggr_expr {
+                fields.extend(expr.state_fields()?.iter().cloned());
+            }
+        }
+        InlineAggregateMode::Final => {
+            // in final mode, the field with the final result of the accumulator
+            for expr in aggr_expr {
+                fields.push(expr.field())
+            }
+        }
+    }
+
+    Ok(Schema::new_with_metadata(
+        fields,
+        input_schema.metadata().clone(),
+    ))
+}
+
+fn aggregate_expressions(
+    aggr_expr: &[Arc<AggregateFunctionExpr>],
+    mode: &InlineAggregateMode,
+    col_idx_base: usize,
+) -> DFResult<Vec<Vec<Arc<dyn PhysicalExpr>>>> {
+    match mode {
+        InlineAggregateMode::Partial => Ok(aggr_expr
+            .iter()
+            .map(|agg| {
+                let mut result = agg.expressions();
+                // Append ordering requirements to expressions' results. This
+                // way order sensitive aggregators can satisfy requirement
+                // themselves.
+                if let Some(ordering_req) = agg.order_bys() {
+                    result.extend(ordering_req.iter().map(|item| Arc::clone(&item.expr)));
+                }
+                result
+            })
+            .collect()),
+        InlineAggregateMode::Final => {
+            let mut col_idx_base = col_idx_base;
+            aggr_expr
+                .iter()
+                .map(|agg| {
+                    let exprs = merge_expressions(col_idx_base, agg)?;
+                    col_idx_base += exprs.len();
+                    Ok(exprs)
+                })
+                .collect()
+        }
+    }
+}
+
+fn merge_expressions(
+    index_base: usize,
+    expr: &AggregateFunctionExpr,
+) -> DFResult<Vec<Arc<dyn PhysicalExpr>>> {
+    expr.state_fields().map(|fields| {
+        fields
+            .iter()
+            .enumerate()
+            .map(|(idx, f)| Arc::new(DFColumn::new(f.name(), index_base + idx)) as _)
+            .collect()
+    })
+}
+
+pub(crate) fn create_group_accumulator(
+    agg_expr: &Arc<AggregateFunctionExpr>,
+) -> DFResult<Box<dyn GroupsAccumulator>> {
+    if agg_expr.groups_accumulator_supported() {
+        agg_expr.create_groups_accumulator()
+    } else {
+        let agg_expr_captured = Arc::clone(agg_expr);
+        let factory = move || agg_expr_captured.create_accumulator();
+        Ok(Box::new(GroupsAccumulatorAdapter::new(factory)))
+    }
+}
+
+impl Stream for InlineAggregateStream {
+    type Item = DFResult<RecordBatch>;
+
+    fn poll_next(
+        mut self: std::pin::Pin<&mut Self>,
+        cx: &mut Context<'_>,
+    ) -> Poll<Option<Self::Item>> {
+        loop {
+            match &self.exec_state {
+                ExecutionState::ReadingInput => 'reading_input: {
+                    match ready!(self.input.poll_next_unpin(cx)) {
+                        // New batch to aggregate in partial aggregation operator
+                        Some(Ok(batch)) if self.mode == InlineAggregateMode::Partial => {
+                            /* let timer = elapsed_compute.timer();
+                            let input_rows = batch.num_rows();
+
+                            // Do the grouping
+                            self.group_aggregate_batch(batch)?;
+
+                            self.update_skip_aggregation_probe(input_rows);
+
+                            // If we can begin emitting rows, do so,
+                            // otherwise keep consuming input
+                            assert!(!self.input_done);
+
+                            // If the number of group values equals or exceeds the soft limit,
+                            // emit all groups and switch to producing output
+                            if self.hit_soft_group_limit() {
+                                timer.done();
+                                self.set_input_done_and_produce_output()?;
+                                // make sure the exec_state just set is not overwritten below
+                                break 'reading_input;
+                            }
+
+                            if let Some(to_emit) = self.group_ordering.emit_to() {
+                                timer.done();
+                                if let Some(batch) = self.emit(to_emit, false)? {
+
+                                        ExecutionState::ProducingOutput(batch);
+                                };
+                                // make sure the exec_state just set is not overwritten below
+                                break 'reading_input;
+                            }
+
+                            self.emit_early_if_necessary()?;
+
+                            self.switch_to_skip_aggregation()?;
+
+                            timer.done(); */
+                            todo!()
+                        }
+
+                        // New batch to aggregate in terminal aggregation operator
+                        // (Final/FinalPartitioned/Single/SinglePartitioned)
+                        Some(Ok(batch)) => {
+                            /* let timer = elapsed_compute.timer();
+
+                            // Make sure we have enough capacity for `batch`, otherwise spill
+                            self.spill_previous_if_necessary(&batch)?;
+
+                            // Do the grouping
+
+
+                            // If we can begin emitting rows, do so,
+                            // otherwise keep consuming input
+                            assert!(!self.input_done);
+
+                            // If the number of group values equals or exceeds the soft limit,
+                            // emit all groups and switch to producing output
+                            if self.hit_soft_group_limit() {
+                                timer.done();
+                                self.set_input_done_and_produce_output()?;
+                                // make sure the exec_state just set is not overwritten below
+                                break 'reading_input;
+                            }
+
+                            if let Some(to_emit) = self.group_ordering.emit_to() {
+                                timer.done();
+                                if let Some(batch) = self.emit(to_emit, false)? {
+                                    self.exec_state =
+                                        ExecutionState::ProducingOutput(batch);
+                                };
+                                // make sure the exec_state just set is not overwritten below
+                                break 'reading_input;
+                            }
+
+                            timer.done(); */
+                            todo!()
+                        }
+
+                        // Found error from input stream
+                        Some(Err(e)) => {
+                            // inner had error, return to caller
+                            return Poll::Ready(Some(Err(e)));
+                        }
+
+                        // Found end from input stream
+                        None => {
+                            // inner is done, emit all rows and switch to producing output
+                            //self.set_input_done_and_produce_output()?;
+                            todo!()
+                        }
+                    }
+                }
+
+                ExecutionState::ProducingOutput(batch) => {
+                    // slice off a part of the batch, if needed
+                    /* let output_batch;
+                    let size = self.batch_size;
+                    (self.exec_state, output_batch) = if batch.num_rows() <= size {
+                        (
+                            if self.input_done {
+                                ExecutionState::Done
+                            }
+                            // In Partial aggregation, we also need to check
+                            // if we should trigger partial skipping
+                            else if self.mode == AggregateMode::Partial
+                                && self.should_skip_aggregation()
+                            {
+                                ExecutionState::SkippingAggregation
+                            } else {
+                                ExecutionState::ReadingInput
+                            },
+                            batch.clone(),
+                        )
+                    } else {
+                        // output first batch_size rows
+                        let size = self.batch_size;
+                        let num_remaining = batch.num_rows() - size;
+                        let remaining = batch.slice(size, num_remaining);
+                        let output = batch.slice(0, size);
+                        (ExecutionState::ProducingOutput(remaining), output)
+                    };
+                    // Empty record batches should not be emitted.
+                    // They need to be treated as  [`Option<RecordBatch>`]es and handled separately
+                    debug_assert!(output_batch.num_rows() > 0);
+                    return Poll::Ready(Some(Ok(
+                        output_batch.record_output(&self.baseline_metrics)
+                    ))); */
+                    todo!()
+                }
+
+                ExecutionState::Done => {
+                    // release the memory reservation since sending back output batch itself needs
+                    // some memory reservation, so make some room for it.
+                    /* self.clear_all();
+                    let _ = self.update_memory_reservation(); */
+                    return Poll::Ready(None);
+                }
+            }
+        }
+    }
+}
+
+impl InlineAggregateStream {
+    fn group_aggregate_batch(&mut self, batch: RecordBatch) -> DFResult<()> {
+        // Evaluate the grouping expressions
+        /* let group_by_values = evaluate_group_by(&self.group_by, &batch)?;
+
+        // Evaluate the aggregation expressions.
+        let input_values = evaluate_many(&self.aggregate_arguments, &batch)?;
+
+        // Evaluate the filter expressions, if any, against the inputs
+        let filter_values = evaluate_optional(&self.filter_expressions, &batch)?;
+
+        for group_values in &group_by_values {
+            // calculate the group indices for each input row
+            let starting_num_groups = self.group_values.len();
+            self.group_values
+                .intern(group_values, &mut self.current_group_indices)?;
+            let group_indices = &self.current_group_indices;
+
+            // Update ordering information if necessary
+            /* let total_num_groups = self.group_values.len();
+            if total_num_groups > starting_num_groups {
+                self.group_ordering
+                    .new_groups(group_values, group_indices, total_num_groups)?;
+            } */
+
+            // Gather the inputs to call the actual accumulator
+            let t = self
+                .accumulators
+                .iter_mut()
+                .zip(input_values.iter())
+                .zip(filter_values.iter());
+
+            for ((acc, values), opt_filter) in t {
+                let opt_filter = opt_filter.as_ref().map(|filter| filter.as_boolean());
+
+                // Call the appropriate method on each aggregator with
+                // the entire input row and the relevant group indexes
+                match self.mode {
+                    InlineAggregateMode::Partial => {
+                        acc.update_batch(values, group_indices, opt_filter, total_num_groups)?;
+                    }
+                    _ => {
+                        if opt_filter.is_some() {
+                            return internal_err!("aggregate filter should be applied in partial stage, there should be no filter in final stage");
+                        }
+
+                        // if aggregation is over intermediate states,
+                        // use merge
+                        acc.merge_batch(values, group_indices, None, total_num_groups)?;
+                    }
+                }
+            }
+        } */
+        Ok(())
+    }
+}
+
+/// Evaluates expressions against a record batch.
+fn evaluate(expr: &[Arc<dyn PhysicalExpr>], batch: &RecordBatch) -> DFResult<Vec<ArrayRef>> {
+    expr.iter()
+        .map(|expr| {
+            expr.evaluate(batch)
+                .and_then(|v| v.into_array(batch.num_rows()))
+        })
+        .collect()
+}
+
+/// Evaluates expressions against a record batch.
+fn evaluate_many(
+    expr: &[Vec<Arc<dyn PhysicalExpr>>],
+    batch: &RecordBatch,
+) -> DFResult<Vec<Vec<ArrayRef>>> {
+    expr.iter().map(|expr| evaluate(expr, batch)).collect()
+}
+
+fn evaluate_optional(
+    expr: &[Option<Arc<dyn PhysicalExpr>>],
+    batch: &RecordBatch,
+) -> DFResult<Vec<Option<ArrayRef>>> {
+    expr.iter()
+        .map(|expr| {
+            expr.as_ref()
+                .map(|expr| {
+                    expr.evaluate(batch)
+                        .and_then(|v| v.into_array(batch.num_rows()))
+                })
+                .transpose()
+        })
+        .collect()
+}
+
+fn group_id_array(group: &[bool], batch: &RecordBatch) -> DFResult<ArrayRef> {
+    if group.len() > 64 {
+        return not_impl_err!("Grouping sets with more than 64 columns are not supported");
+    }
+    let group_id = group.iter().fold(0u64, |acc, &is_null| {
+        (acc << 1) | if is_null { 1 } else { 0 }
+    });
+    let num_rows = batch.num_rows();
+    if group.len() <= 8 {
+        Ok(Arc::new(UInt8Array::from(vec![group_id as u8; num_rows])))
+    } else if group.len() <= 16 {
+        Ok(Arc::new(UInt16Array::from(vec![group_id as u16; num_rows])))
+    } else if group.len() <= 32 {
+        Ok(Arc::new(UInt32Array::from(vec![group_id as u32; num_rows])))
+    } else {
+        Ok(Arc::new(UInt64Array::from(vec![group_id; num_rows])))
+    }
+}
+
+/// Evaluate a group by expression against a `RecordBatch`
+///
+/// Arguments:
+/// - `group_by`: the expression to evaluate
+/// - `batch`: the `RecordBatch` to evaluate against
+///
+/// Returns: A Vec of Vecs of Array of results
+/// The outer Vec appears to be for grouping sets
+/// The inner Vec contains the results per expression
+/// The inner-inner Array contains the results per row
+fn evaluate_group_by(
+    group_by: &PhysicalGroupBy,
+    batch: &RecordBatch,
+) -> DFResult<Vec<Vec<ArrayRef>>> {
+    let exprs: Vec<ArrayRef> = group_by
+        .expr()
+        .iter()
+        .map(|(expr, _)| {
+            let value = expr.evaluate(batch)?;
+            value.into_array(batch.num_rows())
+        })
+        .collect::<DFResult<Vec<_>>>()?;
+
+    let null_exprs: Vec<ArrayRef> = group_by
+        .null_expr()
+        .iter()
+        .map(|(expr, _)| {
+            let value = expr.evaluate(batch)?;
+            value.into_array(batch.num_rows())
+        })
+        .collect::<DFResult<Vec<_>>>()?;
+
+    group_by
+        .groups()
+        .iter()
+        .map(|group| {
+            let mut group_values = Vec::with_capacity(group_by.num_group_exprs());
+            group_values.extend(group.iter().enumerate().map(|(idx, is_null)| {
+                if *is_null {
+                    Arc::clone(&null_exprs[idx])
+                } else {
+                    Arc::clone(&exprs[idx])
+                }
+            }));
+            if !group_by.is_single() {
+                group_values.push(group_id_array(group, batch)?);
+            }
+            Ok(group_values)
+        })
+        .collect()
+}
diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs
new file mode 100644
index 0000000000000..6f78a7ce9b375
--- /dev/null
+++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs
@@ -0,0 +1,281 @@
+mod inline_aggregate_stream;
+mod sorted_group_values;
+use crate::cluster::{
+    pick_worker_by_ids, pick_worker_by_partitions, Cluster, WorkerPlanningParams,
+};
+use crate::config::injection::DIService;
+use crate::config::ConfigObj;
+use crate::metastore::multi_index::MultiPartition;
+use crate::metastore::table::Table;
+use crate::metastore::{Column, ColumnType, IdRow, Index, Partition};
+use crate::queryplanner::filter_by_key_range::FilterByKeyRangeExec;
+use crate::queryplanner::merge_sort::LastRowByUniqueKeyExec;
+use crate::queryplanner::metadata_cache::{MetadataCacheFactory, NoopParquetMetadataCache};
+use crate::queryplanner::optimizations::{CubeQueryPlanner, PreOptimizeRule};
+use crate::queryplanner::physical_plan_flags::PhysicalPlanFlags;
+use crate::queryplanner::planning::{get_worker_plan, Snapshot, Snapshots};
+use crate::queryplanner::pretty_printers::{pp_phys_plan, pp_phys_plan_ext, pp_plan, PPOptions};
+use crate::queryplanner::serialized_plan::{IndexSnapshot, RowFilter, RowRange, SerializedPlan};
+use crate::queryplanner::trace_data_loaded::DataLoadedSize;
+use crate::store::DataFrame;
+use crate::table::data::rows_to_columns;
+use crate::table::parquet::CubestoreParquetMetadataCache;
+use crate::table::{Row, TableValue, TimestampValue};
+use crate::telemetry::suboptimal_query_plan_event;
+use crate::util::memory::MemoryHandler;
+use crate::{app_metrics, CubeError};
+use async_trait::async_trait;
+use core::fmt;
+use datafusion::arrow::array::{
+    make_array, Array, ArrayRef, BinaryArray, BooleanArray, Decimal128Array, Float64Array,
+    Int16Array, Int32Array, Int64Array, MutableArrayData, NullArray, StringArray,
+    TimestampMicrosecondArray, TimestampNanosecondArray, UInt16Array, UInt32Array, UInt64Array,
+    UInt8Array,
+};
+use datafusion::arrow::compute::SortOptions;
+use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit};
+use datafusion::arrow::ipc::reader::StreamReader;
+use datafusion::arrow::ipc::writer::StreamWriter;
+use datafusion::arrow::record_batch::RecordBatch;
+use datafusion::catalog::Session;
+use datafusion::common::stats::Precision;
+use datafusion::common::{Statistics, ToDFSchema};
+use datafusion::config::TableParquetOptions;
+use datafusion::datasource::listing::PartitionedFile;
+use datafusion::datasource::object_store::ObjectStoreUrl;
+use datafusion::datasource::physical_plan::parquet::get_reader_options_customizer;
+use datafusion::datasource::physical_plan::{
+    FileScanConfig, ParquetFileReaderFactory, ParquetSource,
+};
+use datafusion::datasource::{TableProvider, TableType};
+use datafusion::dfschema::{internal_err, not_impl_err};
+use datafusion::error::DataFusionError;
+use datafusion::error::Result as DFResult;
+use datafusion::execution::TaskContext;
+use datafusion::logical_expr::{Expr, LogicalPlan};
+use datafusion::physical_expr;
+use datafusion::physical_expr::aggregate::AggregateFunctionExpr;
+use datafusion::physical_expr::LexOrdering;
+use datafusion::physical_expr::{
+    Distribution, EquivalenceProperties, LexRequirement, PhysicalSortExpr, PhysicalSortRequirement,
+};
+use datafusion::physical_optimizer::aggregate_statistics::AggregateStatistics;
+use datafusion::physical_optimizer::combine_partial_final_agg::CombinePartialFinalAggregate;
+use datafusion::physical_optimizer::enforce_sorting::EnforceSorting;
+use datafusion::physical_optimizer::join_selection::JoinSelection;
+use datafusion::physical_optimizer::limit_pushdown::LimitPushdown;
+use datafusion::physical_optimizer::limited_distinct_aggregation::LimitedDistinctAggregation;
+use datafusion::physical_optimizer::output_requirements::OutputRequirements;
+use datafusion::physical_optimizer::projection_pushdown::ProjectionPushdown;
+use datafusion::physical_optimizer::sanity_checker::SanityCheckPlan;
+use datafusion::physical_optimizer::topk_aggregation::TopKAggregation;
+use datafusion::physical_optimizer::update_aggr_exprs::OptimizeAggregateOrder;
+use datafusion::physical_optimizer::PhysicalOptimizerRule;
+use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec;
+use datafusion::physical_plan::empty::EmptyExec;
+use datafusion::physical_plan::execution_plan::{Boundedness, CardinalityEffect, EmissionType};
+use datafusion::physical_plan::metrics::MetricsSet;
+use datafusion::physical_plan::projection::ProjectionExec;
+use datafusion::physical_plan::sorts::sort::SortExec;
+use datafusion::physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
+use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
+use datafusion::physical_plan::{aggregates::*, InputOrderMode};
+use datafusion::physical_plan::{
+    collect, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PhysicalExpr,
+    PlanProperties, SendableRecordBatchStream,
+};
+use datafusion::prelude::{and, SessionConfig, SessionContext};
+use datafusion_datasource::memory::MemorySourceConfig;
+use datafusion_datasource::source::DataSourceExec;
+use futures_util::{stream, StreamExt, TryStreamExt};
+use itertools::Itertools;
+use log::{debug, error, trace, warn};
+use mockall::automock;
+use serde_derive::{Deserialize, Serialize};
+use std::any::Any;
+use std::cmp::min;
+use std::collections::{HashMap, HashSet};
+use std::fmt::{Debug, Formatter};
+use std::io::Cursor;
+use std::mem::take;
+use std::sync::Arc;
+use std::time::SystemTime;
+use tracing::{instrument, Instrument};
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub enum InlineAggregateMode {
+    Partial,
+    Final,
+}
+
+#[derive(Debug, Clone)]
+pub struct InlineAggregateExec {
+    mode: InlineAggregateMode,
+    /// Group by expressions
+    group_by: PhysicalGroupBy,
+    /// Aggregate expressions
+    aggr_expr: Vec<Arc<AggregateFunctionExpr>>,
+    /// FILTER (WHERE clause) expression for each aggregate expression
+    filter_expr: Vec<Option<Arc<dyn PhysicalExpr>>>,
+    /// Set if the output of this aggregation is truncated by a upstream sort/limit clause
+    limit: Option<usize>,
+    /// Input plan, could be a partial aggregate or the input to the aggregate
+    pub input: Arc<dyn ExecutionPlan>,
+    /// Schema after the aggregate is applied
+    schema: SchemaRef,
+    /// Input schema before any aggregation is applied. For partial aggregate this will be the
+    /// same as input.schema() but for the final aggregate it will be the same as the input
+    /// to the partial aggregate, i.e., partial and final aggregates have same `input_schema`.
+    /// We need the input schema of partial aggregate to be able to deserialize aggregate
+    /// expressions from protobuf for final aggregate.
+    pub input_schema: SchemaRef,
+    cache: PlanProperties,
+}
+
+impl InlineAggregateExec {
+    pub fn try_new_from_aggregate(aggregate: &AggregateExec) -> Option<Self> {
+        if matches!(aggregate.input_order_mode(), InputOrderMode::Sorted) {
+            return None;
+        }
+        let mode = match aggregate.mode() {
+            AggregateMode::Partial => InlineAggregateMode::Partial,
+            AggregateMode::Final => InlineAggregateMode::Final,
+            _ => return None,
+        };
+        let group_by = aggregate.group_expr().clone();
+        let aggr_expr = aggregate.aggr_expr().iter().cloned().collect();
+        let filter_expr = aggregate.filter_expr().iter().cloned().collect();
+        let limit = aggregate.limit().clone();
+        let input = aggregate.input().clone();
+        let schema = aggregate.schema().clone();
+        let input_schema = aggregate.input_schema().clone();
+        let cache = aggregate.cache().clone();
+        Some(Self {
+            mode,
+            group_by,
+            aggr_expr,
+            filter_expr,
+            limit,
+            input,
+            schema,
+            input_schema,
+            cache,
+        })
+    }
+
+    pub fn input(&self) -> &Arc<dyn ExecutionPlan> {
+        &self.input
+    }
+}
+
+impl DisplayAs for InlineAggregateExec {
+    fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        match t {
+            DisplayFormatType::Default | DisplayFormatType::Verbose => {
+                write!(f, "InlineAggregateExec: mode={:?}", self.mode)?;
+            }
+        }
+        Ok(())
+    }
+}
+
+impl ExecutionPlan for InlineAggregateExec {
+    fn name(&self) -> &'static str {
+        "InlineAggregateExec"
+    }
+
+    /// Return a reference to Any that can be used for down-casting
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn properties(&self) -> &PlanProperties {
+        &self.cache
+    }
+
+    fn required_input_distribution(&self) -> Vec<Distribution> {
+        match &self.mode {
+            InlineAggregateMode::Partial => {
+                vec![Distribution::UnspecifiedDistribution]
+            }
+            InlineAggregateMode::Final => {
+                vec![Distribution::SinglePartition]
+            }
+        }
+    }
+
+    fn required_input_ordering(&self) -> Vec<Option<LexRequirement>> {
+        vec![]
+    }
+
+    fn maintains_input_order(&self) -> Vec<bool> {
+        vec![true]
+    }
+
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![&self.input]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> DFResult<Arc<dyn ExecutionPlan>> {
+        let result = Self {
+            mode: self.mode,
+            group_by: self.group_by.clone(),
+            aggr_expr: self.aggr_expr.clone(),
+            filter_expr: self.filter_expr.clone(),
+            limit: self.limit.clone(),
+            input: children[0].clone(),
+            schema: self.schema.clone(),
+            input_schema: self.input_schema.clone(),
+            cache: self.cache.clone(),
+        };
+        Ok(Arc::new(result))
+    }
+
+    fn execute(
+        &self,
+        partition: usize,
+        context: Arc<TaskContext>,
+    ) -> DFResult<SendableRecordBatchStream> {
+        /* self.execute_typed(partition, context)
+        .map(|stream| stream.into()) */
+        todo!()
+    }
+
+    fn metrics(&self) -> Option<MetricsSet> {
+        None
+    }
+
+    fn statistics(&self) -> DFResult<Statistics> {
+        let column_statistics = Statistics::unknown_column(&self.schema());
+        // When the input row count is 0 or 1, we can adopt that statistic keeping its reliability.
+        // When it is larger than 1, we degrade the precision since it may decrease after aggregation.
+        let num_rows = if let Some(value) = self.input().statistics()?.num_rows.get_value() {
+            if *value > 1 {
+                self.input().statistics()?.num_rows.to_inexact()
+            } else if *value == 0 {
+                // Aggregation on an empty table creates a null row.
+                self.input()
+                    .statistics()?
+                    .num_rows
+                    .add(&Precision::Exact(1))
+            } else {
+                // num_rows = 1 case
+                self.input().statistics()?.num_rows
+            }
+        } else {
+            Precision::Absent
+        };
+        Ok(Statistics {
+            num_rows,
+            column_statistics,
+            total_byte_size: Precision::Absent,
+        })
+    }
+
+    fn cardinality_effect(&self) -> CardinalityEffect {
+        CardinalityEffect::LowerEqual
+    }
+}
diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs
new file mode 100644
index 0000000000000..815274af16a85
--- /dev/null
+++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs
@@ -0,0 +1,304 @@
+use datafusion::physical_plan::aggregates::group_values::multi_group_by::GroupColumn;
+
+use std::mem::{self, size_of};
+
+use datafusion::arrow::array::{Array, ArrayRef, RecordBatch};
+use datafusion::arrow::compute::cast;
+use datafusion::arrow::datatypes::{
+    BinaryViewType, DataType, Date32Type, Date64Type, Decimal128Type, Float32Type, Float64Type,
+    Int16Type, Int32Type, Int64Type, Int8Type, Schema, SchemaRef, StringViewType,
+    Time32MillisecondType, Time32SecondType, Time64MicrosecondType, Time64NanosecondType, TimeUnit,
+    TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType,
+    TimestampSecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
+};
+use datafusion::dfschema::internal_err;
+use datafusion::dfschema::not_impl_err;
+use datafusion::error::Result as DFResult;
+use datafusion::physical_expr::binary_map::OutputType;
+use datafusion::physical_plan::aggregates::group_values::multi_group_by::{
+    ByteGroupValueBuilder, ByteViewGroupValueBuilder, PrimitiveGroupValueBuilder,
+};
+
+pub struct SortedGroupValues {
+    /// The output schema
+    schema: SchemaRef,
+    group_values: Vec<Box<dyn GroupColumn>>,
+    rows_inds: Vec<usize>,
+    equal_to_results: Vec<bool>,
+}
+
+impl SortedGroupValues {
+    pub fn try_new(schema: SchemaRef) -> DFResult<Self> {
+        Ok(Self {
+            schema,
+            group_values: vec![],
+            rows_inds: vec![],
+            equal_to_results: vec![],
+        })
+    }
+
+    fn intern_impl(&mut self, cols: &[ArrayRef], groups: &mut Vec<usize>) -> DFResult<()> {
+        /* let n_rows = cols[0].len();
+        groups.clear();
+
+        if n_rows == 0 {
+            return Ok(());
+        }
+
+        let first_group_idx = self.make_new_group_if_needed(cols, 0);
+        groups.push(first_group_idx);
+
+        if n_rows == 1 {
+            return Ok(());
+        }
+
+        if self.rows_inds.len() < n_rows {
+            let old_len = self.rows_inds.len();
+            self.rows_inds.extend(old_len..n_rows);
+        }
+
+        self.equal_to_results.fill(true);
+        self.equal_to_results.resize(n_rows - 1, true);
+
+        let lhs_rows = &self.rows_inds[0..n_rows - 1];
+        let rhs_rows = &self.rows_inds[1..n_rows];
+        for (col_idx, group_col) in self.group_values.iter().enumerate() {
+            cols[col_idx].vectorized_equal_to(
+                lhs_rows,
+                &cols[col_idx],
+                rhs_rows,
+                &mut self.equal_to_results,
+            );
+        }
+        println!("!!!!! AAAAAAAAAA");
+        let mut current_group_idx = first_group_idx;
+        for i in 0..n_rows - 1 {
+            if !self.equal_to_results[i] {
+                for (col_idx, group_value) in self.group_values.iter_mut().enumerate() {
+                    group_value.append_val(&cols[col_idx], i + 1);
+                }
+                current_group_idx = self.group_values[0].len() - 1;
+            }
+            groups.push(current_group_idx);
+        }
+        println!("!!!!! BBBBBBB");
+        Ok(()) */
+        Ok(())
+    }
+
+    fn make_new_group_if_needed(&mut self, cols: &[ArrayRef], row: usize) -> usize {
+        let new_group_needed = if self.group_values[0].len() == 0 {
+            true
+        } else {
+            self.group_values.iter().enumerate().any(|(i, group_val)| {
+                !group_val.equal_to(self.group_values[0].len() - 1, &cols[i], row)
+            })
+        };
+        if new_group_needed {
+            for (i, group_value) in self.group_values.iter_mut().enumerate() {
+                group_value.append_val(&cols[i], row);
+            }
+        }
+        self.group_values[0].len() - 1
+    }
+}
+
+/// instantiates a [`PrimitiveGroupValueBuilder`] and pushes it into $v
+///
+/// Arguments:
+/// `$v`: the vector to push the new builder into
+/// `$nullable`: whether the input can contains nulls
+/// `$t`: the primitive type of the builder
+///
+macro_rules! instantiate_primitive {
+    ($v:expr, $nullable:expr, $t:ty, $data_type:ident) => {
+        if $nullable {
+            let b = PrimitiveGroupValueBuilder::<$t, true>::new($data_type.to_owned());
+            $v.push(Box::new(b) as _)
+        } else {
+            let b = PrimitiveGroupValueBuilder::<$t, false>::new($data_type.to_owned());
+            $v.push(Box::new(b) as _)
+        }
+    };
+}
+
+/* impl GroupValues for SortedGroupValues {
+    fn intern(&mut self, cols: &[ArrayRef], groups: &mut Vec<usize>) -> Result<()> {
+        if self.group_values.is_empty() {
+            let mut v = Vec::with_capacity(cols.len());
+
+            for f in self.schema.fields().iter() {
+                let nullable = f.is_nullable();
+                let data_type = f.data_type();
+                match data_type {
+                    &DataType::Int8 => {
+                        instantiate_primitive!(v, nullable, Int8Type, data_type)
+                    }
+                    &DataType::Int16 => {
+                        instantiate_primitive!(v, nullable, Int16Type, data_type)
+                    }
+                    &DataType::Int32 => {
+                        instantiate_primitive!(v, nullable, Int32Type, data_type)
+                    }
+                    &DataType::Int64 => {
+                        instantiate_primitive!(v, nullable, Int64Type, data_type)
+                    }
+                    &DataType::UInt8 => {
+                        instantiate_primitive!(v, nullable, UInt8Type, data_type)
+                    }
+                    &DataType::UInt16 => {
+                        instantiate_primitive!(v, nullable, UInt16Type, data_type)
+                    }
+                    &DataType::UInt32 => {
+                        instantiate_primitive!(v, nullable, UInt32Type, data_type)
+                    }
+                    &DataType::UInt64 => {
+                        instantiate_primitive!(v, nullable, UInt64Type, data_type)
+                    }
+                    &DataType::Float32 => {
+                        instantiate_primitive!(v, nullable, Float32Type, data_type)
+                    }
+                    &DataType::Float64 => {
+                        instantiate_primitive!(v, nullable, Float64Type, data_type)
+                    }
+                    &DataType::Date32 => {
+                        instantiate_primitive!(v, nullable, Date32Type, data_type)
+                    }
+                    &DataType::Date64 => {
+                        instantiate_primitive!(v, nullable, Date64Type, data_type)
+                    }
+                    &DataType::Time32(t) => match t {
+                        TimeUnit::Second => {
+                            instantiate_primitive!(v, nullable, Time32SecondType, data_type)
+                        }
+                        TimeUnit::Millisecond => {
+                            instantiate_primitive!(v, nullable, Time32MillisecondType, data_type)
+                        }
+                        _ => {}
+                    },
+                    &DataType::Time64(t) => match t {
+                        TimeUnit::Microsecond => {
+                            instantiate_primitive!(v, nullable, Time64MicrosecondType, data_type)
+                        }
+                        TimeUnit::Nanosecond => {
+                            instantiate_primitive!(v, nullable, Time64NanosecondType, data_type)
+                        }
+                        _ => {}
+                    },
+                    &DataType::Timestamp(t, _) => match t {
+                        TimeUnit::Second => {
+                            instantiate_primitive!(v, nullable, TimestampSecondType, data_type)
+                        }
+                        TimeUnit::Millisecond => {
+                            instantiate_primitive!(v, nullable, TimestampMillisecondType, data_type)
+                        }
+                        TimeUnit::Microsecond => {
+                            instantiate_primitive!(v, nullable, TimestampMicrosecondType, data_type)
+                        }
+                        TimeUnit::Nanosecond => {
+                            instantiate_primitive!(v, nullable, TimestampNanosecondType, data_type)
+                        }
+                    },
+                    &DataType::Decimal128(_, _) => {
+                        instantiate_primitive! {
+                            v,
+                            nullable,
+                            Decimal128Type,
+                            data_type
+                        }
+                    }
+                    &DataType::Utf8 => {
+                        let b = ByteGroupValueBuilder::<i32>::new(OutputType::Utf8);
+                        v.push(Box::new(b) as _)
+                    }
+                    &DataType::LargeUtf8 => {
+                        let b = ByteGroupValueBuilder::<i64>::new(OutputType::Utf8);
+                        v.push(Box::new(b) as _)
+                    }
+                    &DataType::Binary => {
+                        let b = ByteGroupValueBuilder::<i32>::new(OutputType::Binary);
+                        v.push(Box::new(b) as _)
+                    }
+                    &DataType::LargeBinary => {
+                        let b = ByteGroupValueBuilder::<i64>::new(OutputType::Binary);
+                        v.push(Box::new(b) as _)
+                    }
+                    &DataType::Utf8View => {
+                        let b = ByteViewGroupValueBuilder::<StringViewType>::new();
+                        v.push(Box::new(b) as _)
+                    }
+                    &DataType::BinaryView => {
+                        let b = ByteViewGroupValueBuilder::<BinaryViewType>::new();
+                        v.push(Box::new(b) as _)
+                    }
+                    dt => return not_impl_err!("{dt} not supported in GroupValuesColumn"),
+                }
+            }
+            self.group_values = v;
+        }
+        self.intern_impl(cols, groups)
+    }
+
+    fn size(&self) -> usize {
+        let group_values_size: usize = self.group_values.iter().map(|v| v.size()).sum();
+        group_values_size
+    }
+
+    fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    fn len(&self) -> usize {
+        if self.group_values.is_empty() {
+            return 0;
+        }
+
+        self.group_values[0].len()
+    }
+
+    fn emit(&mut self) -> Result<Vec<ArrayRef>> {
+        /* let mut output = match emit_to {
+            EmitTo::All => {
+                let group_values = mem::take(&mut self.group_values);
+                debug_assert!(self.group_values.is_empty());
+
+                group_values
+                    .into_iter()
+                    .map(|v| v.build())
+                    .collect::<Vec<_>>()
+            }
+            EmitTo::First(n) => {
+                let output = self
+                    .group_values
+                    .iter_mut()
+                    .map(|v| v.take_n(n))
+                    .collect::<Vec<_>>();
+
+                output
+            }
+        };
+
+        // TODO: Materialize dictionaries in group keys (#7647)
+        for (field, array) in self.schema.fields.iter().zip(&mut output) {
+            let expected = field.data_type();
+            if let DataType::Dictionary(_, v) = expected {
+                let actual = array.data_type();
+                if v.as_ref() != actual {
+                    return Err(DataFusionError::Internal(format!(
+                        "Converted group rows expected dictionary of {v} got {actual}"
+                    )));
+                }
+                *array = cast(array.as_ref(), expected)?;
+            }
+        }
+
+        Ok(output) */
+        todo!()
+    }
+
+    fn clear_shrink(&mut self, batch: &RecordBatch) {
+        self.group_values.clear();
+        self.rows_inds.clear();
+        self.equal_to_results.clear();
+    }
+} */
diff --git a/rust/cubestore/cubestore/src/queryplanner/mod.rs b/rust/cubestore/cubestore/src/queryplanner/mod.rs
index cd86bb5629346..464337c5bb5f3 100644
--- a/rust/cubestore/cubestore/src/queryplanner/mod.rs
+++ b/rust/cubestore/cubestore/src/queryplanner/mod.rs
@@ -30,6 +30,7 @@ mod rolling;
 mod test_utils;
 pub mod udf_xirr;
 pub mod udfs;
+mod inline_aggregate;
 
 use crate::cachestore::CacheStore;
 use crate::config::injection::DIService;
@@ -310,7 +311,7 @@ impl QueryPlannerImpl {
             .execution
             .dont_parallelize_sort_preserving_merge_exec_inputs = true;
         config.options_mut().execution.batch_size = Self::EXECUTION_BATCH_SIZE;
-        config.options_mut().execution.parquet.split_row_group_reads = true;
+        config.options_mut().execution.parquet.split_row_group_reads = false;
 
         // TODO upgrade DF: build SessionContexts consistently
         let state = Self::minimal_session_state_from_final_config(config)
diff --git a/rust/cubestore/cubestore/src/queryplanner/pretty_printers.rs b/rust/cubestore/cubestore/src/queryplanner/pretty_printers.rs
index 8478c5d67e497..fac32b4f8e63c 100644
--- a/rust/cubestore/cubestore/src/queryplanner/pretty_printers.rs
+++ b/rust/cubestore/cubestore/src/queryplanner/pretty_printers.rs
@@ -43,7 +43,7 @@ use crate::queryplanner::topk::{
     AggregateTopKExec, ClusterAggregateTopKLower, ClusterAggregateTopKUpper,
 };
 use crate::queryplanner::{CubeTableLogical, InfoSchemaTableProvider, QueryPlan};
-use crate::streaming::topic_table_provider::TopicTableProvider;
+//use crate::streaming::topic_table_provider::TopicTableProvider;
 use datafusion::physical_plan::empty::EmptyExec;
 use datafusion::physical_plan::expressions::Column;
 use datafusion::physical_plan::joins::{HashJoinExec, SortMergeJoinExec};
@@ -481,8 +481,6 @@ fn pp_source(t: Arc<dyn TableProvider>) -> String {
         .downcast_ref::<InfoSchemaQueryCacheTableProvider>()
     {
         "InfoSchemaQueryCacheTableProvider".to_string()
-    } else if let Some(_) = t.as_any().downcast_ref::<TopicTableProvider>() {
-        "TopicTableProvider".to_string()
     } else {
         panic!("unknown table provider");
     }
diff --git a/rust/cubestore/cubestore/src/queryplanner/query_executor.rs b/rust/cubestore/cubestore/src/queryplanner/query_executor.rs
index 47b5b9f7caa34..7072ad8c59180 100644
--- a/rust/cubestore/cubestore/src/queryplanner/query_executor.rs
+++ b/rust/cubestore/cubestore/src/queryplanner/query_executor.rs
@@ -486,7 +486,7 @@ impl QueryExecutorImpl {
             .with_target_partitions(2)
             .with_prefer_existing_sort(true)
             .with_round_robin_repartition(false);
-        config.options_mut().execution.parquet.split_row_group_reads = true;
+        config.options_mut().execution.parquet.split_row_group_reads = false;
         config.options_mut().optimizer.prefer_hash_join = false;
         // Redundant with the commented CoalesceBatches::new() line in `Self::optimizer_rules`
         config.options_mut().execution.coalesce_batches = false;
@@ -1178,6 +1178,7 @@ impl ExecutionPlan for CubeTableExec {
         mut partition: usize,
         context: Arc<TaskContext>,
     ) -> Result<SendableRecordBatchStream, DataFusionError> {
+        println!("!!! Table exec: {}, {}", self.partition_execs.len(), partition);
         let exec = self
             .partition_execs
             .iter()
diff --git a/rust/cubestore/rust-toolchain.toml b/rust/cubestore/rust-toolchain.toml
index 935f99e36558c..ad8132da3e1bc 100644
--- a/rust/cubestore/rust-toolchain.toml
+++ b/rust/cubestore/rust-toolchain.toml
@@ -1,4 +1,4 @@
 [toolchain]
-channel = "nightly-2024-10-30"
+channel = "nightly-2025-08-01"
 components = ["rustfmt", "clippy"]
 profile = "minimal"

From d14c4ad48903d429457da7e0dfbb1eee19bebe1e Mon Sep 17 00:00:00 2001
From: Alexandr Romanenko <alex.romanenko@cube.dev>
Date: Mon, 20 Oct 2025 16:15:15 +0200
Subject: [PATCH 2/9] in work

---
 .../inline_aggregate/sorted_group_values.rs   | 166 +++++++++---------
 1 file changed, 82 insertions(+), 84 deletions(-)

diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs
index 815274af16a85..47ea718a804d5 100644
--- a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs
+++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs
@@ -27,82 +27,6 @@ pub struct SortedGroupValues {
     equal_to_results: Vec<bool>,
 }
 
-impl SortedGroupValues {
-    pub fn try_new(schema: SchemaRef) -> DFResult<Self> {
-        Ok(Self {
-            schema,
-            group_values: vec![],
-            rows_inds: vec![],
-            equal_to_results: vec![],
-        })
-    }
-
-    fn intern_impl(&mut self, cols: &[ArrayRef], groups: &mut Vec<usize>) -> DFResult<()> {
-        /* let n_rows = cols[0].len();
-        groups.clear();
-
-        if n_rows == 0 {
-            return Ok(());
-        }
-
-        let first_group_idx = self.make_new_group_if_needed(cols, 0);
-        groups.push(first_group_idx);
-
-        if n_rows == 1 {
-            return Ok(());
-        }
-
-        if self.rows_inds.len() < n_rows {
-            let old_len = self.rows_inds.len();
-            self.rows_inds.extend(old_len..n_rows);
-        }
-
-        self.equal_to_results.fill(true);
-        self.equal_to_results.resize(n_rows - 1, true);
-
-        let lhs_rows = &self.rows_inds[0..n_rows - 1];
-        let rhs_rows = &self.rows_inds[1..n_rows];
-        for (col_idx, group_col) in self.group_values.iter().enumerate() {
-            cols[col_idx].vectorized_equal_to(
-                lhs_rows,
-                &cols[col_idx],
-                rhs_rows,
-                &mut self.equal_to_results,
-            );
-        }
-        println!("!!!!! AAAAAAAAAA");
-        let mut current_group_idx = first_group_idx;
-        for i in 0..n_rows - 1 {
-            if !self.equal_to_results[i] {
-                for (col_idx, group_value) in self.group_values.iter_mut().enumerate() {
-                    group_value.append_val(&cols[col_idx], i + 1);
-                }
-                current_group_idx = self.group_values[0].len() - 1;
-            }
-            groups.push(current_group_idx);
-        }
-        println!("!!!!! BBBBBBB");
-        Ok(()) */
-        Ok(())
-    }
-
-    fn make_new_group_if_needed(&mut self, cols: &[ArrayRef], row: usize) -> usize {
-        let new_group_needed = if self.group_values[0].len() == 0 {
-            true
-        } else {
-            self.group_values.iter().enumerate().any(|(i, group_val)| {
-                !group_val.equal_to(self.group_values[0].len() - 1, &cols[i], row)
-            })
-        };
-        if new_group_needed {
-            for (i, group_value) in self.group_values.iter_mut().enumerate() {
-                group_value.append_val(&cols[i], row);
-            }
-        }
-        self.group_values[0].len() - 1
-    }
-}
-
 /// instantiates a [`PrimitiveGroupValueBuilder`] and pushes it into $v
 ///
 /// Arguments:
@@ -122,8 +46,17 @@ macro_rules! instantiate_primitive {
     };
 }
 
-/* impl GroupValues for SortedGroupValues {
-    fn intern(&mut self, cols: &[ArrayRef], groups: &mut Vec<usize>) -> Result<()> {
+impl SortedGroupValues {
+    pub fn try_new(schema: SchemaRef) -> DFResult<Self> {
+        Ok(Self {
+            schema,
+            group_values: vec![],
+            rows_inds: vec![],
+            equal_to_results: vec![],
+        })
+    }
+
+    pub fn intern(&mut self, cols: &[ArrayRef], groups: &mut Vec<usize>) -> DFResult<()> {
         if self.group_values.is_empty() {
             let mut v = Vec::with_capacity(cols.len());
 
@@ -231,7 +164,7 @@ macro_rules! instantiate_primitive {
                         let b = ByteViewGroupValueBuilder::<BinaryViewType>::new();
                         v.push(Box::new(b) as _)
                     }
-                    dt => return not_impl_err!("{dt} not supported in GroupValuesColumn"),
+                    dt => return not_impl_err!("{dt} not supported in SortedGroupValues"),
                 }
             }
             self.group_values = v;
@@ -239,16 +172,16 @@ macro_rules! instantiate_primitive {
         self.intern_impl(cols, groups)
     }
 
-    fn size(&self) -> usize {
+    pub fn size(&self) -> usize {
         let group_values_size: usize = self.group_values.iter().map(|v| v.size()).sum();
         group_values_size
     }
 
-    fn is_empty(&self) -> bool {
+    pub fn is_empty(&self) -> bool {
         self.len() == 0
     }
 
-    fn len(&self) -> usize {
+    pub fn len(&self) -> usize {
         if self.group_values.is_empty() {
             return 0;
         }
@@ -256,7 +189,7 @@ macro_rules! instantiate_primitive {
         self.group_values[0].len()
     }
 
-    fn emit(&mut self) -> Result<Vec<ArrayRef>> {
+    pub fn emit(&mut self) -> DFResult<Vec<ArrayRef>> {
         /* let mut output = match emit_to {
             EmitTo::All => {
                 let group_values = mem::take(&mut self.group_values);
@@ -301,4 +234,69 @@ macro_rules! instantiate_primitive {
         self.rows_inds.clear();
         self.equal_to_results.clear();
     }
-} */
+
+    fn intern_impl(&mut self, cols: &[ArrayRef], groups: &mut Vec<usize>) -> DFResult<()> {
+        /* let n_rows = cols[0].len();
+        groups.clear();
+
+        if n_rows == 0 {
+            return Ok(());
+        }
+
+        let first_group_idx = self.make_new_group_if_needed(cols, 0);
+        groups.push(first_group_idx);
+
+        if n_rows == 1 {
+            return Ok(());
+        }
+
+        if self.rows_inds.len() < n_rows {
+            let old_len = self.rows_inds.len();
+            self.rows_inds.extend(old_len..n_rows);
+        }
+
+        self.equal_to_results.fill(true);
+        self.equal_to_results.resize(n_rows - 1, true);
+
+        let lhs_rows = &self.rows_inds[0..n_rows - 1];
+        let rhs_rows = &self.rows_inds[1..n_rows];
+        for (col_idx, group_col) in self.group_values.iter().enumerate() {
+            cols[col_idx].vectorized_equal_to(
+                lhs_rows,
+                &cols[col_idx],
+                rhs_rows,
+                &mut self.equal_to_results,
+            );
+        }
+        println!("!!!!! AAAAAAAAAA");
+        let mut current_group_idx = first_group_idx;
+        for i in 0..n_rows - 1 {
+            if !self.equal_to_results[i] {
+                for (col_idx, group_value) in self.group_values.iter_mut().enumerate() {
+                    group_value.append_val(&cols[col_idx], i + 1);
+                }
+                current_group_idx = self.group_values[0].len() - 1;
+            }
+            groups.push(current_group_idx);
+        }
+        println!("!!!!! BBBBBBB");
+        Ok(()) */
+        Ok(())
+    }
+
+    fn make_new_group_if_needed(&mut self, cols: &[ArrayRef], row: usize) -> usize {
+        let new_group_needed = if self.group_values[0].len() == 0 {
+            true
+        } else {
+            self.group_values.iter().enumerate().any(|(i, group_val)| {
+                !group_val.equal_to(self.group_values[0].len() - 1, &cols[i], row)
+            })
+        };
+        if new_group_needed {
+            for (i, group_value) in self.group_values.iter_mut().enumerate() {
+                group_value.append_val(&cols[i], row);
+            }
+        }
+        self.group_values[0].len() - 1
+    }
+}

From 50517a5e7773d0b1a318aa250b600b4b2082cff5 Mon Sep 17 00:00:00 2001
From: Alexandr Romanenko <alex.romanenko@cube.dev>
Date: Mon, 20 Oct 2025 16:54:19 +0200
Subject: [PATCH 3/9] in work

---
 .../inline_aggregate/column_comparator.rs     | 262 ++++++++++++++++++
 .../src/queryplanner/inline_aggregate/mod.rs  |   1 +
 2 files changed, 263 insertions(+)
 create mode 100644 rust/cubestore/cubestore/src/queryplanner/inline_aggregate/column_comparator.rs

diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/column_comparator.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/column_comparator.rs
new file mode 100644
index 0000000000000..df4e0c12a4e73
--- /dev/null
+++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/column_comparator.rs
@@ -0,0 +1,262 @@
+use datafusion::arrow::array::*;
+use datafusion::arrow::datatypes::*;
+use std::marker::PhantomData;
+
+/// Trait for comparing adjacent rows in an array to detect group boundaries.
+/// Used in sorted group-by operations to efficiently find where groups change.
+pub trait ColumnComparator: Send + Sync {
+    /// Compare adjacent rows in the column, updating `equal_results`.
+    ///
+    /// For each index i in 0..equal_results.len():
+    /// - If equal_results[i] is true, compares row[i] with row[i+1]
+    /// - Sets equal_results[i] to false if rows differ (group boundary)
+    /// - Leaves equal_results[i] unchanged if already false (short-circuit)
+    fn compare_adjacent(&self, col: &ArrayRef, equal_results: &mut [bool]);
+}
+
+/// Comparator for primitive types (integers, floats, decimals, dates, timestamps).
+///
+/// Uses const generic NULLABLE parameter to eliminate null-checking overhead
+/// for NOT NULL columns at compile time.
+pub struct PrimitiveComparator<T: ArrowPrimitiveType, const NULLABLE: bool>
+where
+    T::Native: PartialEq,
+    T: Send + Sync,
+{
+    _phantom: PhantomData<T>,
+}
+
+impl<T: ArrowPrimitiveType, const NULLABLE: bool> PrimitiveComparator<T, NULLABLE>
+where
+    T::Native: PartialEq,
+    T: Send + Sync,
+{
+    pub fn new() -> Self {
+        Self {
+            _phantom: PhantomData,
+        }
+    }
+}
+
+impl<T: ArrowPrimitiveType, const NULLABLE: bool> ColumnComparator
+    for PrimitiveComparator<T, NULLABLE>
+where
+    T::Native: PartialEq,
+    T: Send + Sync,
+{
+    #[inline]
+    fn compare_adjacent(&self, col: &ArrayRef, equal_results: &mut [bool]) {
+        let array = col.as_primitive::<T>();
+
+        let values = array.values();
+
+        if NULLABLE {
+            // Nullable column - check if there are actually any nulls
+            if array.null_count() == 0 {
+                // Fast path: column is nullable but this batch has no nulls
+                for i in 0..equal_results.len() {
+                    if equal_results[i] {
+                        equal_results[i] = values[i] == values[i + 1];
+                    }
+                }
+            } else {
+                // Slow path: need to check null bitmap
+                let nulls = array.nulls().expect("null_count > 0 but no nulls bitmap");
+                for i in 0..equal_results.len() {
+                    if equal_results[i] {
+                        let null1 = nulls.is_null(i);
+                        let null2 = nulls.is_null(i + 1);
+
+                        // Both must be null or both must be non-null with equal values
+                        equal_results[i] =
+                            (null1 == null2) && (null1 || values[i] == values[i + 1]);
+                    }
+                }
+            }
+        } else {
+            // NOT NULL column - no null checks needed, compiler will optimize this aggressively
+            for i in 0..equal_results.len() {
+                if equal_results[i] {
+                    equal_results[i] = values[i] == values[i + 1];
+                }
+            }
+        }
+    }
+}
+
+/// Comparator for byte array types (Utf8, LargeUtf8, Binary, LargeBinary).
+///
+/// Uses generic over ByteArrayType to handle both i32 and i64 offset variants.
+pub struct ByteArrayComparator<T: ByteArrayType + Send + Sync, const NULLABLE: bool> {
+    _phantom: PhantomData<T>,
+}
+
+impl<T: ByteArrayType + Send + Sync, const NULLABLE: bool> ByteArrayComparator<T, NULLABLE> {
+    pub fn new() -> Self {
+        Self {
+            _phantom: PhantomData,
+        }
+    }
+}
+
+impl<T: ByteArrayType + Send + Sync, const NULLABLE: bool> ColumnComparator
+    for ByteArrayComparator<T, NULLABLE>
+where
+    T::Native: PartialEq,
+{
+    #[inline]
+    fn compare_adjacent(&self, col: &ArrayRef, equal_results: &mut [bool]) {
+        let array = col.as_bytes::<T>();
+
+        if NULLABLE {
+            if array.null_count() == 0 {
+                // Fast path: no nulls in this batch
+                for i in 0..equal_results.len() {
+                    if equal_results[i] {
+                        equal_results[i] = array.value(i) == array.value(i + 1);
+                    }
+                }
+            } else {
+                // Use iterator which handles nulls efficiently
+                let mut iter1 = array.iter();
+                let mut iter2 = array.iter().skip(1);
+
+                for (i, (v1, v2)) in iter1.zip(iter2).enumerate() {
+                    if equal_results[i] {
+                        equal_results[i] = v1 == v2;
+                    }
+                }
+            }
+        } else {
+            // NOT NULL column - direct value comparison
+            for i in 0..equal_results.len() {
+                if equal_results[i] {
+                    equal_results[i] = array.value(i) == array.value(i + 1);
+                }
+            }
+        }
+    }
+}
+
+/// Comparator for ByteView types (Utf8View, BinaryView).
+///
+/// ByteView arrays store short strings (<=12 bytes) inline, allowing fast comparison
+/// of the view value before comparing full string data.
+pub struct ByteViewComparator<T: ByteViewType + Send + Sync, const NULLABLE: bool> {
+    _phantom: PhantomData<T>,
+}
+
+impl<T: ByteViewType + Send + Sync, const NULLABLE: bool> ByteViewComparator<T, NULLABLE> {
+    pub fn new() -> Self {
+        Self {
+            _phantom: PhantomData,
+        }
+    }
+}
+
+impl<T: ByteViewType + Send + Sync, const NULLABLE: bool> ColumnComparator
+    for ByteViewComparator<T, NULLABLE>
+where
+    T::Native: PartialEq,
+{
+    #[inline]
+    fn compare_adjacent(&self, col: &ArrayRef, equal_results: &mut [bool]) {
+        let array = col.as_byte_view::<T>();
+
+        if NULLABLE {
+            if array.null_count() == 0 {
+                // Fast path: no nulls
+                for i in 0..equal_results.len() {
+                    if equal_results[i] {
+                        equal_results[i] = array.value(i) == array.value(i + 1);
+                    }
+                }
+            } else {
+                // Handle nulls via iterator
+                let mut iter1 = array.iter();
+                let mut iter2 = array.iter().skip(1);
+
+                for (i, (v1, v2)) in iter1.zip(iter2).enumerate() {
+                    if equal_results[i] {
+                        equal_results[i] = v1 == v2;
+                    }
+                }
+            }
+        } else {
+            // NOT NULL column
+            for i in 0..equal_results.len() {
+                if equal_results[i] {
+                    equal_results[i] = array.value(i) == array.value(i + 1);
+                }
+            }
+        }
+    }
+}
+
+/// Instantiate a primitive comparator and push it into the vector.
+///
+/// Handles const generic NULLABLE parameter based on field nullability.
+#[macro_export]
+macro_rules! instantiate_primitive_comparator {
+    ($v:expr, $nullable:expr, $t:ty) => {
+        if $nullable {
+            $v.push(Box::new(
+                $crate::queryplanner::inline_aggregate::column_comparator::PrimitiveComparator::<
+                    $t,
+                    true,
+                >::new(),
+            ) as _)
+        } else {
+            $v.push(Box::new(
+                $crate::queryplanner::inline_aggregate::column_comparator::PrimitiveComparator::<
+                    $t,
+                    false,
+                >::new(),
+            ) as _)
+        }
+    };
+}
+
+/// Instantiate a byte array comparator and push it into the vector.
+#[macro_export]
+macro_rules! instantiate_byte_array_comparator {
+    ($v:expr, $nullable:expr, $t:ty) => {
+        if $nullable {
+            $v.push(Box::new(
+                $crate::queryplanner::inline_aggregate::column_comparator::ByteArrayComparator::<
+                    $t,
+                    true,
+                >::new(),
+            ) as _)
+        } else {
+            $v.push(Box::new(
+                $crate::queryplanner::inline_aggregate::column_comparator::ByteArrayComparator::<
+                    $t,
+                    false,
+                >::new(),
+            ) as _)
+        }
+    };
+}
+
+/// Instantiate a byte view comparator and push it into the vector.
+#[macro_export]
+macro_rules! instantiate_byte_view_comparator {
+    ($v:expr, $nullable:expr, $t:ty) => {
+        if $nullable {
+            $v.push(Box::new(
+                $crate::queryplanner::inline_aggregate::column_comparator::ByteViewComparator::<
+                    $t,
+                    true,
+                >::new(),
+            ) as _)
+        } else {
+            $v.push(Box::new(
+                $crate::queryplanner::inline_aggregate::column_comparator::ByteViewComparator::<
+                    $t,
+                    false,
+                >::new(),
+            ) as _)
+        }
+    };
+}
diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs
index 6f78a7ce9b375..fca0f1d38019b 100644
--- a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs
+++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs
@@ -1,3 +1,4 @@
+mod column_comparator;
 mod inline_aggregate_stream;
 mod sorted_group_values;
 use crate::cluster::{

From 6ae25f6f2f57c6acfc2697530f91a4d82a8bddbe Mon Sep 17 00:00:00 2001
From: Alexandr Romanenko <alex.romanenko@cube.dev>
Date: Mon, 20 Oct 2025 16:59:27 +0200
Subject: [PATCH 4/9] in work

---
 .../inline_aggregate/sorted_group_values.rs   | 104 ++++++++++++------
 1 file changed, 73 insertions(+), 31 deletions(-)

diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs
index 47ea718a804d5..f757544a8edb5 100644
--- a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs
+++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs
@@ -5,11 +5,12 @@ use std::mem::{self, size_of};
 use datafusion::arrow::array::{Array, ArrayRef, RecordBatch};
 use datafusion::arrow::compute::cast;
 use datafusion::arrow::datatypes::{
-    BinaryViewType, DataType, Date32Type, Date64Type, Decimal128Type, Float32Type, Float64Type,
-    Int16Type, Int32Type, Int64Type, Int8Type, Schema, SchemaRef, StringViewType,
-    Time32MillisecondType, Time32SecondType, Time64MicrosecondType, Time64NanosecondType, TimeUnit,
-    TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType,
-    TimestampSecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
+    BinaryType, BinaryViewType, DataType, Date32Type, Date64Type, Decimal128Type, Float32Type,
+    Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, LargeBinaryType, LargeUtf8Type,
+    Schema, SchemaRef, StringViewType, Time32MillisecondType, Time32SecondType,
+    Time64MicrosecondType, Time64NanosecondType, TimeUnit, TimestampMicrosecondType,
+    TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, UInt16Type,
+    UInt32Type, UInt64Type, UInt8Type, Utf8Type,
 };
 use datafusion::dfschema::internal_err;
 use datafusion::dfschema::not_impl_err;
@@ -19,11 +20,22 @@ use datafusion::physical_plan::aggregates::group_values::multi_group_by::{
     ByteGroupValueBuilder, ByteViewGroupValueBuilder, PrimitiveGroupValueBuilder,
 };
 
+use crate::queryplanner::inline_aggregate::column_comparator::ColumnComparator;
+use crate::{
+    instantiate_byte_array_comparator, instantiate_byte_view_comparator,
+    instantiate_primitive_comparator,
+};
+
 pub struct SortedGroupValues {
     /// The output schema
     schema: SchemaRef,
+    /// Group value builders for each grouping column
     group_values: Vec<Box<dyn GroupColumn>>,
+    /// Column comparators for detecting group boundaries
+    comparators: Vec<Box<dyn ColumnComparator>>,
+    /// Reusable buffer for row indices (not currently used)
     rows_inds: Vec<usize>,
+    /// Reusable buffer for equality comparison results
     equal_to_results: Vec<bool>,
 }
 
@@ -51,6 +63,7 @@ impl SortedGroupValues {
         Ok(Self {
             schema,
             group_values: vec![],
+            comparators: vec![],
             rows_inds: vec![],
             equal_to_results: vec![],
         })
@@ -59,77 +72,98 @@ impl SortedGroupValues {
     pub fn intern(&mut self, cols: &[ArrayRef], groups: &mut Vec<usize>) -> DFResult<()> {
         if self.group_values.is_empty() {
             let mut v = Vec::with_capacity(cols.len());
+            let mut comparators = Vec::with_capacity(cols.len());
 
             for f in self.schema.fields().iter() {
                 let nullable = f.is_nullable();
                 let data_type = f.data_type();
                 match data_type {
                     &DataType::Int8 => {
-                        instantiate_primitive!(v, nullable, Int8Type, data_type)
+                        instantiate_primitive!(v, nullable, Int8Type, data_type);
+                        instantiate_primitive_comparator!(comparators, nullable, Int8Type);
                     }
                     &DataType::Int16 => {
-                        instantiate_primitive!(v, nullable, Int16Type, data_type)
+                        instantiate_primitive!(v, nullable, Int16Type, data_type);
+                        instantiate_primitive_comparator!(comparators, nullable, Int16Type);
                     }
                     &DataType::Int32 => {
-                        instantiate_primitive!(v, nullable, Int32Type, data_type)
+                        instantiate_primitive!(v, nullable, Int32Type, data_type);
+                        instantiate_primitive_comparator!(comparators, nullable, Int32Type);
                     }
                     &DataType::Int64 => {
-                        instantiate_primitive!(v, nullable, Int64Type, data_type)
+                        instantiate_primitive!(v, nullable, Int64Type, data_type);
+                        instantiate_primitive_comparator!(comparators, nullable, Int64Type);
                     }
                     &DataType::UInt8 => {
-                        instantiate_primitive!(v, nullable, UInt8Type, data_type)
+                        instantiate_primitive!(v, nullable, UInt8Type, data_type);
+                        instantiate_primitive_comparator!(comparators, nullable, UInt8Type);
                     }
                     &DataType::UInt16 => {
-                        instantiate_primitive!(v, nullable, UInt16Type, data_type)
+                        instantiate_primitive!(v, nullable, UInt16Type, data_type);
+                        instantiate_primitive_comparator!(comparators, nullable, UInt16Type);
                     }
                     &DataType::UInt32 => {
-                        instantiate_primitive!(v, nullable, UInt32Type, data_type)
+                        instantiate_primitive!(v, nullable, UInt32Type, data_type);
+                        instantiate_primitive_comparator!(comparators, nullable, UInt32Type);
                     }
                     &DataType::UInt64 => {
-                        instantiate_primitive!(v, nullable, UInt64Type, data_type)
+                        instantiate_primitive!(v, nullable, UInt64Type, data_type);
+                        instantiate_primitive_comparator!(comparators, nullable, UInt64Type);
                     }
                     &DataType::Float32 => {
-                        instantiate_primitive!(v, nullable, Float32Type, data_type)
+                        instantiate_primitive!(v, nullable, Float32Type, data_type);
+                        instantiate_primitive_comparator!(comparators, nullable, Float32Type);
                     }
                     &DataType::Float64 => {
-                        instantiate_primitive!(v, nullable, Float64Type, data_type)
+                        instantiate_primitive!(v, nullable, Float64Type, data_type);
+                        instantiate_primitive_comparator!(comparators, nullable, Float64Type);
                     }
                     &DataType::Date32 => {
-                        instantiate_primitive!(v, nullable, Date32Type, data_type)
+                        instantiate_primitive!(v, nullable, Date32Type, data_type);
+                        instantiate_primitive_comparator!(comparators, nullable, Date32Type);
                     }
                     &DataType::Date64 => {
-                        instantiate_primitive!(v, nullable, Date64Type, data_type)
+                        instantiate_primitive!(v, nullable, Date64Type, data_type);
+                        instantiate_primitive_comparator!(comparators, nullable, Date64Type);
                     }
                     &DataType::Time32(t) => match t {
                         TimeUnit::Second => {
-                            instantiate_primitive!(v, nullable, Time32SecondType, data_type)
+                            instantiate_primitive!(v, nullable, Time32SecondType, data_type);
+                            instantiate_primitive_comparator!(comparators, nullable, Time32SecondType);
                         }
                         TimeUnit::Millisecond => {
-                            instantiate_primitive!(v, nullable, Time32MillisecondType, data_type)
+                            instantiate_primitive!(v, nullable, Time32MillisecondType, data_type);
+                            instantiate_primitive_comparator!(comparators, nullable, Time32MillisecondType);
                         }
                         _ => {}
                     },
                     &DataType::Time64(t) => match t {
                         TimeUnit::Microsecond => {
-                            instantiate_primitive!(v, nullable, Time64MicrosecondType, data_type)
+                            instantiate_primitive!(v, nullable, Time64MicrosecondType, data_type);
+                            instantiate_primitive_comparator!(comparators, nullable, Time64MicrosecondType);
                         }
                         TimeUnit::Nanosecond => {
-                            instantiate_primitive!(v, nullable, Time64NanosecondType, data_type)
+                            instantiate_primitive!(v, nullable, Time64NanosecondType, data_type);
+                            instantiate_primitive_comparator!(comparators, nullable, Time64NanosecondType);
                         }
                         _ => {}
                     },
                     &DataType::Timestamp(t, _) => match t {
                         TimeUnit::Second => {
-                            instantiate_primitive!(v, nullable, TimestampSecondType, data_type)
+                            instantiate_primitive!(v, nullable, TimestampSecondType, data_type);
+                            instantiate_primitive_comparator!(comparators, nullable, TimestampSecondType);
                         }
                         TimeUnit::Millisecond => {
-                            instantiate_primitive!(v, nullable, TimestampMillisecondType, data_type)
+                            instantiate_primitive!(v, nullable, TimestampMillisecondType, data_type);
+                            instantiate_primitive_comparator!(comparators, nullable, TimestampMillisecondType);
                         }
                         TimeUnit::Microsecond => {
-                            instantiate_primitive!(v, nullable, TimestampMicrosecondType, data_type)
+                            instantiate_primitive!(v, nullable, TimestampMicrosecondType, data_type);
+                            instantiate_primitive_comparator!(comparators, nullable, TimestampMicrosecondType);
                         }
                         TimeUnit::Nanosecond => {
-                            instantiate_primitive!(v, nullable, TimestampNanosecondType, data_type)
+                            instantiate_primitive!(v, nullable, TimestampNanosecondType, data_type);
+                            instantiate_primitive_comparator!(comparators, nullable, TimestampNanosecondType);
                         }
                     },
                     &DataType::Decimal128(_, _) => {
@@ -139,35 +173,43 @@ impl SortedGroupValues {
                             Decimal128Type,
                             data_type
                         }
+                        instantiate_primitive_comparator!(comparators, nullable, Decimal128Type);
                     }
                     &DataType::Utf8 => {
                         let b = ByteGroupValueBuilder::<i32>::new(OutputType::Utf8);
-                        v.push(Box::new(b) as _)
+                        v.push(Box::new(b) as _);
+                        instantiate_byte_array_comparator!(comparators, nullable, Utf8Type);
                     }
                     &DataType::LargeUtf8 => {
                         let b = ByteGroupValueBuilder::<i64>::new(OutputType::Utf8);
-                        v.push(Box::new(b) as _)
+                        v.push(Box::new(b) as _);
+                        instantiate_byte_array_comparator!(comparators, nullable, LargeUtf8Type);
                     }
                     &DataType::Binary => {
                         let b = ByteGroupValueBuilder::<i32>::new(OutputType::Binary);
-                        v.push(Box::new(b) as _)
+                        v.push(Box::new(b) as _);
+                        instantiate_byte_array_comparator!(comparators, nullable, BinaryType);
                     }
                     &DataType::LargeBinary => {
                         let b = ByteGroupValueBuilder::<i64>::new(OutputType::Binary);
-                        v.push(Box::new(b) as _)
+                        v.push(Box::new(b) as _);
+                        instantiate_byte_array_comparator!(comparators, nullable, LargeBinaryType);
                     }
                     &DataType::Utf8View => {
                         let b = ByteViewGroupValueBuilder::<StringViewType>::new();
-                        v.push(Box::new(b) as _)
+                        v.push(Box::new(b) as _);
+                        instantiate_byte_view_comparator!(comparators, nullable, StringViewType);
                     }
                     &DataType::BinaryView => {
                         let b = ByteViewGroupValueBuilder::<BinaryViewType>::new();
-                        v.push(Box::new(b) as _)
+                        v.push(Box::new(b) as _);
+                        instantiate_byte_view_comparator!(comparators, nullable, BinaryViewType);
                     }
                     dt => return not_impl_err!("{dt} not supported in SortedGroupValues"),
                 }
             }
             self.group_values = v;
+            self.comparators = comparators;
         }
         self.intern_impl(cols, groups)
     }

From 2ef63e94bc695220edfd6fec23c5568918171370 Mon Sep 17 00:00:00 2001
From: Alexandr Romanenko <alex.romanenko@cube.dev>
Date: Mon, 20 Oct 2025 17:18:42 +0200
Subject: [PATCH 5/9] in work

---
 .../inline_aggregate/sorted_group_values.rs   | 126 ++++++++++++------
 1 file changed, 86 insertions(+), 40 deletions(-)

diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs
index f757544a8edb5..d47c47381b7b6 100644
--- a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs
+++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs
@@ -1,3 +1,4 @@
+use datafusion::logical_expr::EmitTo;
 use datafusion::physical_plan::aggregates::group_values::multi_group_by::GroupColumn;
 
 use std::mem::{self, size_of};
@@ -6,15 +7,15 @@ use datafusion::arrow::array::{Array, ArrayRef, RecordBatch};
 use datafusion::arrow::compute::cast;
 use datafusion::arrow::datatypes::{
     BinaryType, BinaryViewType, DataType, Date32Type, Date64Type, Decimal128Type, Float32Type,
-    Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, LargeBinaryType, LargeUtf8Type,
-    Schema, SchemaRef, StringViewType, Time32MillisecondType, Time32SecondType,
-    Time64MicrosecondType, Time64NanosecondType, TimeUnit, TimestampMicrosecondType,
-    TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, UInt16Type,
-    UInt32Type, UInt64Type, UInt8Type, Utf8Type,
+    Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, LargeBinaryType, LargeUtf8Type, Schema,
+    SchemaRef, StringViewType, Time32MillisecondType, Time32SecondType, Time64MicrosecondType,
+    Time64NanosecondType, TimeUnit, TimestampMicrosecondType, TimestampMillisecondType,
+    TimestampNanosecondType, TimestampSecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
+    Utf8Type,
 };
 use datafusion::dfschema::internal_err;
 use datafusion::dfschema::not_impl_err;
-use datafusion::error::Result as DFResult;
+use datafusion::error::{DataFusionError, Result as DFResult};
 use datafusion::physical_expr::binary_map::OutputType;
 use datafusion::physical_plan::aggregates::group_values::multi_group_by::{
     ByteGroupValueBuilder, ByteViewGroupValueBuilder, PrimitiveGroupValueBuilder,
@@ -129,41 +130,83 @@ impl SortedGroupValues {
                     &DataType::Time32(t) => match t {
                         TimeUnit::Second => {
                             instantiate_primitive!(v, nullable, Time32SecondType, data_type);
-                            instantiate_primitive_comparator!(comparators, nullable, Time32SecondType);
+                            instantiate_primitive_comparator!(
+                                comparators,
+                                nullable,
+                                Time32SecondType
+                            );
                         }
                         TimeUnit::Millisecond => {
                             instantiate_primitive!(v, nullable, Time32MillisecondType, data_type);
-                            instantiate_primitive_comparator!(comparators, nullable, Time32MillisecondType);
+                            instantiate_primitive_comparator!(
+                                comparators,
+                                nullable,
+                                Time32MillisecondType
+                            );
                         }
                         _ => {}
                     },
                     &DataType::Time64(t) => match t {
                         TimeUnit::Microsecond => {
                             instantiate_primitive!(v, nullable, Time64MicrosecondType, data_type);
-                            instantiate_primitive_comparator!(comparators, nullable, Time64MicrosecondType);
+                            instantiate_primitive_comparator!(
+                                comparators,
+                                nullable,
+                                Time64MicrosecondType
+                            );
                         }
                         TimeUnit::Nanosecond => {
                             instantiate_primitive!(v, nullable, Time64NanosecondType, data_type);
-                            instantiate_primitive_comparator!(comparators, nullable, Time64NanosecondType);
+                            instantiate_primitive_comparator!(
+                                comparators,
+                                nullable,
+                                Time64NanosecondType
+                            );
                         }
                         _ => {}
                     },
                     &DataType::Timestamp(t, _) => match t {
                         TimeUnit::Second => {
                             instantiate_primitive!(v, nullable, TimestampSecondType, data_type);
-                            instantiate_primitive_comparator!(comparators, nullable, TimestampSecondType);
+                            instantiate_primitive_comparator!(
+                                comparators,
+                                nullable,
+                                TimestampSecondType
+                            );
                         }
                         TimeUnit::Millisecond => {
-                            instantiate_primitive!(v, nullable, TimestampMillisecondType, data_type);
-                            instantiate_primitive_comparator!(comparators, nullable, TimestampMillisecondType);
+                            instantiate_primitive!(
+                                v,
+                                nullable,
+                                TimestampMillisecondType,
+                                data_type
+                            );
+                            instantiate_primitive_comparator!(
+                                comparators,
+                                nullable,
+                                TimestampMillisecondType
+                            );
                         }
                         TimeUnit::Microsecond => {
-                            instantiate_primitive!(v, nullable, TimestampMicrosecondType, data_type);
-                            instantiate_primitive_comparator!(comparators, nullable, TimestampMicrosecondType);
+                            instantiate_primitive!(
+                                v,
+                                nullable,
+                                TimestampMicrosecondType,
+                                data_type
+                            );
+                            instantiate_primitive_comparator!(
+                                comparators,
+                                nullable,
+                                TimestampMicrosecondType
+                            );
                         }
                         TimeUnit::Nanosecond => {
                             instantiate_primitive!(v, nullable, TimestampNanosecondType, data_type);
-                            instantiate_primitive_comparator!(comparators, nullable, TimestampNanosecondType);
+                            instantiate_primitive_comparator!(
+                                comparators,
+                                nullable,
+                                TimestampNanosecondType
+                            );
                         }
                     },
                     &DataType::Decimal128(_, _) => {
@@ -231,8 +274,8 @@ impl SortedGroupValues {
         self.group_values[0].len()
     }
 
-    pub fn emit(&mut self) -> DFResult<Vec<ArrayRef>> {
-        /* let mut output = match emit_to {
+    fn emit(&mut self, emit_to: EmitTo) -> DFResult<Vec<ArrayRef>> {
+        let mut output = match emit_to {
             EmitTo::All => {
                 let group_values = mem::take(&mut self.group_values);
                 debug_assert!(self.group_values.is_empty());
@@ -253,7 +296,6 @@ impl SortedGroupValues {
             }
         };
 
-        // TODO: Materialize dictionaries in group keys (#7647)
         for (field, array) in self.schema.fields.iter().zip(&mut output) {
             let expected = field.data_type();
             if let DataType::Dictionary(_, v) = expected {
@@ -267,24 +309,25 @@ impl SortedGroupValues {
             }
         }
 
-        Ok(output) */
-        todo!()
+        Ok(output)
     }
 
     fn clear_shrink(&mut self, batch: &RecordBatch) {
         self.group_values.clear();
+        self.comparators.clear();
         self.rows_inds.clear();
         self.equal_to_results.clear();
     }
 
     fn intern_impl(&mut self, cols: &[ArrayRef], groups: &mut Vec<usize>) -> DFResult<()> {
-        /* let n_rows = cols[0].len();
+        let n_rows = cols[0].len();
         groups.clear();
 
         if n_rows == 0 {
             return Ok(());
         }
 
+        // Handle first row - compare with last group or create new group
         let first_group_idx = self.make_new_group_if_needed(cols, 0);
         groups.push(first_group_idx);
 
@@ -292,28 +335,20 @@ impl SortedGroupValues {
             return Ok(());
         }
 
-        if self.rows_inds.len() < n_rows {
-            let old_len = self.rows_inds.len();
-            self.rows_inds.extend(old_len..n_rows);
-        }
-
-        self.equal_to_results.fill(true);
+        // Prepare buffer for vectorized comparison
         self.equal_to_results.resize(n_rows - 1, true);
+        self.equal_to_results[..n_rows - 1].fill(true);
 
-        let lhs_rows = &self.rows_inds[0..n_rows - 1];
-        let rhs_rows = &self.rows_inds[1..n_rows];
-        for (col_idx, group_col) in self.group_values.iter().enumerate() {
-            cols[col_idx].vectorized_equal_to(
-                lhs_rows,
-                &cols[col_idx],
-                rhs_rows,
-                &mut self.equal_to_results,
-            );
+        // Vectorized comparison: compare row[i] with row[i+1] for all columns
+        for (col, comparator) in cols.iter().zip(&self.comparators) {
+            comparator.compare_adjacent(col, &mut self.equal_to_results[..n_rows - 1]);
         }
-        println!("!!!!! AAAAAAAAAA");
+
+        // Build groups based on comparison results
         let mut current_group_idx = first_group_idx;
         for i in 0..n_rows - 1 {
             if !self.equal_to_results[i] {
+                // Group boundary detected - add new group
                 for (col_idx, group_value) in self.group_values.iter_mut().enumerate() {
                     group_value.append_val(&cols[col_idx], i + 1);
                 }
@@ -321,24 +356,35 @@ impl SortedGroupValues {
             }
             groups.push(current_group_idx);
         }
-        println!("!!!!! BBBBBBB");
-        Ok(()) */
+
         Ok(())
     }
 
+    /// Compare the specified row with the last group and create a new group if different.
+    ///
+    /// This is used to handle the first row of a batch, which needs to be compared
+    /// with the last group from the previous batch to detect group boundaries across batches.
+    ///
+    /// Returns the group index for this row.
     fn make_new_group_if_needed(&mut self, cols: &[ArrayRef], row: usize) -> usize {
         let new_group_needed = if self.group_values[0].len() == 0 {
+            // No groups yet - always create first group
             true
         } else {
+            // Compare with last group - if any column differs, need new group
             self.group_values.iter().enumerate().any(|(i, group_val)| {
                 !group_val.equal_to(self.group_values[0].len() - 1, &cols[i], row)
             })
         };
+
         if new_group_needed {
+            // Add new group with values from this row
             for (i, group_value) in self.group_values.iter_mut().enumerate() {
                 group_value.append_val(&cols[i], row);
             }
         }
+
+        // Return index of the group (either newly created or existing last group)
         self.group_values[0].len() - 1
     }
 }

From db87d629a06ef5ec3c62dcd2591e7491598c913d Mon Sep 17 00:00:00 2001
From: Alexandr Romanenko <alex.romanenko@cube.dev>
Date: Mon, 20 Oct 2025 19:59:08 +0200
Subject: [PATCH 6/9] in work

---
 .../inline_aggregate_stream.rs                | 299 +++++++++---------
 .../src/queryplanner/inline_aggregate/mod.rs  |  39 ++-
 .../inline_aggregate/sorted_group_values.rs   |   2 +-
 .../inline_aggregate_rewriter.rs              |  30 ++
 .../src/queryplanner/optimizations/mod.rs     |   6 +
 .../src/queryplanner/pretty_printers.rs       |  13 +
 6 files changed, 226 insertions(+), 163 deletions(-)
 create mode 100644 rust/cubestore/cubestore/src/queryplanner/optimizations/inline_aggregate_rewriter.rs

diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/inline_aggregate_stream.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/inline_aggregate_stream.rs
index 4da0b9b48f7b0..fc29dc584b45d 100644
--- a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/inline_aggregate_stream.rs
+++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/inline_aggregate_stream.rs
@@ -7,6 +7,7 @@ use crate::metastore::multi_index::MultiPartition;
 use crate::metastore::table::Table;
 use crate::metastore::{Column, ColumnType, IdRow, Index, Partition};
 use crate::queryplanner::filter_by_key_range::FilterByKeyRangeExec;
+use crate::queryplanner::inline_aggregate::sorted_group_values::SortedGroupValues;
 use crate::queryplanner::merge_sort::LastRowByUniqueKeyExec;
 use crate::queryplanner::metadata_cache::{MetadataCacheFactory, NoopParquetMetadataCache};
 use crate::queryplanner::optimizations::{CubeQueryPlanner, PreOptimizeRule};
@@ -50,8 +51,8 @@ use datafusion::dfschema::internal_err;
 use datafusion::dfschema::not_impl_err;
 use datafusion::error::DataFusionError;
 use datafusion::error::Result as DFResult;
-use datafusion::execution::TaskContext;
-use datafusion::logical_expr::{Expr, GroupsAccumulator, LogicalPlan};
+use datafusion::execution::{RecordBatchStream, TaskContext};
+use datafusion::logical_expr::{EmitTo, Expr, GroupsAccumulator, LogicalPlan};
 use datafusion::physical_expr::expressions::Column as DFColumn;
 use datafusion::physical_expr::LexOrdering;
 use datafusion::physical_expr::{self, GroupsAccumulatorAdapter};
@@ -135,6 +136,7 @@ pub(crate) struct InlineAggregateStream {
     input_done: bool,
 
     accumulators: Vec<Box<dyn GroupsAccumulator>>,
+    group_values: SortedGroupValues,
     current_group_indices: Vec<usize>,
 }
 
@@ -189,6 +191,7 @@ impl InlineAggregateStream {
 
         let exec_state = ExecutionState::ReadingInput;
         let current_group_indices = Vec::with_capacity(batch_size);
+        let group_values = SortedGroupValues::try_new(group_schema)?;
 
         Ok(InlineAggregateStream {
             schema: agg_schema,
@@ -201,6 +204,7 @@ impl InlineAggregateStream {
             exec_state,
             batch_size,
             current_group_indices,
+            group_values,
             input_done: false,
         })
     }
@@ -303,144 +307,68 @@ impl Stream for InlineAggregateStream {
     ) -> Poll<Option<Self::Item>> {
         loop {
             match &self.exec_state {
-                ExecutionState::ReadingInput => 'reading_input: {
+                ExecutionState::ReadingInput => {
                     match ready!(self.input.poll_next_unpin(cx)) {
-                        // New batch to aggregate in partial aggregation operator
-                        Some(Ok(batch)) if self.mode == InlineAggregateMode::Partial => {
-                            /* let timer = elapsed_compute.timer();
-                            let input_rows = batch.num_rows();
-
-                            // Do the grouping
-                            self.group_aggregate_batch(batch)?;
-
-                            self.update_skip_aggregation_probe(input_rows);
-
-                            // If we can begin emitting rows, do so,
-                            // otherwise keep consuming input
-                            assert!(!self.input_done);
-
-                            // If the number of group values equals or exceeds the soft limit,
-                            // emit all groups and switch to producing output
-                            if self.hit_soft_group_limit() {
-                                timer.done();
-                                self.set_input_done_and_produce_output()?;
-                                // make sure the exec_state just set is not overwritten below
-                                break 'reading_input;
-                            }
-
-                            if let Some(to_emit) = self.group_ordering.emit_to() {
-                                timer.done();
-                                if let Some(batch) = self.emit(to_emit, false)? {
-
-                                        ExecutionState::ProducingOutput(batch);
-                                };
-                                // make sure the exec_state just set is not overwritten below
-                                break 'reading_input;
-                            }
-
-                            self.emit_early_if_necessary()?;
-
-                            self.switch_to_skip_aggregation()?;
-
-                            timer.done(); */
-                            todo!()
-                        }
-
-                        // New batch to aggregate in terminal aggregation operator
-                        // (Final/FinalPartitioned/Single/SinglePartitioned)
+                        // New input batch to aggregate
                         Some(Ok(batch)) => {
-                            /* let timer = elapsed_compute.timer();
-
-                            // Make sure we have enough capacity for `batch`, otherwise spill
-                            self.spill_previous_if_necessary(&batch)?;
-
-                            // Do the grouping
-
-
-                            // If we can begin emitting rows, do so,
-                            // otherwise keep consuming input
-                            assert!(!self.input_done);
-
-                            // If the number of group values equals or exceeds the soft limit,
-                            // emit all groups and switch to producing output
-                            if self.hit_soft_group_limit() {
-                                timer.done();
-                                self.set_input_done_and_produce_output()?;
-                                // make sure the exec_state just set is not overwritten below
-                                break 'reading_input;
+                            // Aggregate the batch
+                            if let Err(e) = self.group_aggregate_batch(batch) {
+                                return Poll::Ready(Some(Err(e)));
                             }
 
-                            if let Some(to_emit) = self.group_ordering.emit_to() {
-                                timer.done();
-                                if let Some(batch) = self.emit(to_emit, false)? {
-                                    self.exec_state =
-                                        ExecutionState::ProducingOutput(batch);
-                                };
-                                // make sure the exec_state just set is not overwritten below
-                                break 'reading_input;
+                            // Try to emit a batch if we have enough groups
+                            match self.emit_early_if_ready() {
+                                Ok(Some(batch)) => {
+                                    self.exec_state = ExecutionState::ProducingOutput(batch);
+                                }
+                                Ok(None) => {
+                                    // Not enough groups yet, continue reading
+                                }
+                                Err(e) => {
+                                    return Poll::Ready(Some(Err(e)));
+                                }
                             }
-
-                            timer.done(); */
-                            todo!()
                         }
 
-                        // Found error from input stream
+                        // Error from input stream
                         Some(Err(e)) => {
-                            // inner had error, return to caller
                             return Poll::Ready(Some(Err(e)));
                         }
 
-                        // Found end from input stream
+                        // Input stream exhausted - emit all remaining groups
                         None => {
-                            // inner is done, emit all rows and switch to producing output
-                            //self.set_input_done_and_produce_output()?;
-                            todo!()
+                            self.input_done = true;
+
+                            match self.emit(EmitTo::All) {
+                                Ok(Some(batch)) => {
+                                    self.exec_state = ExecutionState::ProducingOutput(batch);
+                                }
+                                Ok(None) => {
+                                    // No groups to emit, we're done
+                                    self.exec_state = ExecutionState::Done;
+                                }
+                                Err(e) => {
+                                    return Poll::Ready(Some(Err(e)));
+                                }
+                            }
                         }
                     }
                 }
 
                 ExecutionState::ProducingOutput(batch) => {
-                    // slice off a part of the batch, if needed
-                    /* let output_batch;
-                    let size = self.batch_size;
-                    (self.exec_state, output_batch) = if batch.num_rows() <= size {
-                        (
-                            if self.input_done {
-                                ExecutionState::Done
-                            }
-                            // In Partial aggregation, we also need to check
-                            // if we should trigger partial skipping
-                            else if self.mode == AggregateMode::Partial
-                                && self.should_skip_aggregation()
-                            {
-                                ExecutionState::SkippingAggregation
-                            } else {
-                                ExecutionState::ReadingInput
-                            },
-                            batch.clone(),
-                        )
+                    let batch = batch.clone();
+
+                    // Determine next state
+                    self.exec_state = if self.input_done {
+                        ExecutionState::Done
                     } else {
-                        // output first batch_size rows
-                        let size = self.batch_size;
-                        let num_remaining = batch.num_rows() - size;
-                        let remaining = batch.slice(size, num_remaining);
-                        let output = batch.slice(0, size);
-                        (ExecutionState::ProducingOutput(remaining), output)
+                        ExecutionState::ReadingInput
                     };
-                    // Empty record batches should not be emitted.
-                    // They need to be treated as  [`Option<RecordBatch>`]es and handled separately
-                    debug_assert!(output_batch.num_rows() > 0);
-                    return Poll::Ready(Some(Ok(
-                        output_batch.record_output(&self.baseline_metrics)
-                    ))); */
-                    todo!()
+
+                    return Poll::Ready(Some(Ok(batch)));
                 }
 
                 ExecutionState::Done => {
-                    // release the memory reservation since sending back output batch itself needs
-                    // some memory reservation, so make some room for it.
-                    /* self.clear_all();
-                    let _ = self.update_memory_reservation(); */
                     return Poll::Ready(None);
                 }
             }
@@ -449,9 +377,69 @@ impl Stream for InlineAggregateStream {
 }
 
 impl InlineAggregateStream {
+    /// Emit groups based on EmitTo strategy.
+    ///
+    /// Returns None if there are no groups to emit.
+    /// Emit groups based on EmitTo strategy.
+    ///
+    /// Returns None if there are no groups to emit.
+    fn emit(&mut self, emit_to: EmitTo) -> DFResult<Option<RecordBatch>> {
+        if self.group_values.is_empty() {
+            return Ok(None);
+        }
+
+        // Get group values arrays
+        let group_arrays = self.group_values.emit(emit_to)?;
+
+        // Get aggregate arrays based on mode
+        let mut aggr_arrays = vec![];
+        for acc in &mut self.accumulators {
+            match self.mode {
+                InlineAggregateMode::Partial => {
+                    // Emit intermediate state
+                    let state = acc.state(emit_to)?;
+                    aggr_arrays.extend(state);
+                }
+                InlineAggregateMode::Final => {
+                    // Emit final aggregated values
+                    aggr_arrays.push(acc.evaluate(emit_to)?);
+                }
+            }
+        }
+
+        // Combine group columns and aggregate columns
+        let mut columns = group_arrays;
+        columns.extend(aggr_arrays);
+
+        let batch = RecordBatch::try_new(Arc::clone(&self.schema), columns)?;
+
+        Ok(Some(batch))
+    }
+
+    /// Check if we have enough groups to emit a batch, keeping the last (potentially incomplete) group.
+    ///
+    /// For sorted aggregation, we emit batches of size batch_size when we have accumulated
+    /// more than batch_size groups. We always keep the last group as it may continue in the next input batch.
+    fn should_emit_early(&self) -> bool {
+        // Need at least (batch_size + 1) groups to emit batch_size and keep 1
+        self.group_values.len() > self.batch_size
+    }
+
+    /// Emit a batch of groups if we have enough accumulated, keeping the last group.
+    ///
+    /// Returns Some(batch) if emitted, None otherwise.
+    fn emit_early_if_ready(&mut self) -> DFResult<Option<RecordBatch>> {
+        if !self.should_emit_early() {
+            return Ok(None);
+        }
+
+        // Emit exactly batch_size groups, keeping the rest (including last incomplete group)
+        self.emit(EmitTo::First(self.batch_size))
+    }
+
     fn group_aggregate_batch(&mut self, batch: RecordBatch) -> DFResult<()> {
         // Evaluate the grouping expressions
-        /* let group_by_values = evaluate_group_by(&self.group_by, &batch)?;
+        let group_by_values = evaluate_group_by(&self.group_by, &batch)?;
 
         // Evaluate the aggregation expressions.
         let input_values = evaluate_many(&self.aggregate_arguments, &batch)?;
@@ -459,48 +447,39 @@ impl InlineAggregateStream {
         // Evaluate the filter expressions, if any, against the inputs
         let filter_values = evaluate_optional(&self.filter_expressions, &batch)?;
 
-        for group_values in &group_by_values {
-            // calculate the group indices for each input row
-            let starting_num_groups = self.group_values.len();
-            self.group_values
-                .intern(group_values, &mut self.current_group_indices)?;
-            let group_indices = &self.current_group_indices;
-
-            // Update ordering information if necessary
-            /* let total_num_groups = self.group_values.len();
-            if total_num_groups > starting_num_groups {
-                self.group_ordering
-                    .new_groups(group_values, group_indices, total_num_groups)?;
-            } */
-
-            // Gather the inputs to call the actual accumulator
-            let t = self
-                .accumulators
-                .iter_mut()
-                .zip(input_values.iter())
-                .zip(filter_values.iter());
-
-            for ((acc, values), opt_filter) in t {
-                let opt_filter = opt_filter.as_ref().map(|filter| filter.as_boolean());
-
-                // Call the appropriate method on each aggregator with
-                // the entire input row and the relevant group indexes
-                match self.mode {
-                    InlineAggregateMode::Partial => {
-                        acc.update_batch(values, group_indices, opt_filter, total_num_groups)?;
+        assert_eq!(group_by_values.len(), 1, "Exactly 1 group value required");
+        self.group_values
+            .intern(&group_by_values[0], &mut self.current_group_indices)?;
+        let group_indices = &self.current_group_indices;
+
+        let total_num_groups = self.group_values.len();
+        // Gather the inputs to call the actual accumulator
+        let t = self
+            .accumulators
+            .iter_mut()
+            .zip(input_values.iter())
+            .zip(filter_values.iter());
+
+        for ((acc, values), opt_filter) in t {
+            let opt_filter = opt_filter.as_ref().map(|filter| filter.as_boolean());
+
+            // Call the appropriate method on each aggregator with
+            // the entire input row and the relevant group indexes
+            match self.mode {
+                InlineAggregateMode::Partial => {
+                    acc.update_batch(values, group_indices, opt_filter, total_num_groups)?;
+                }
+                _ => {
+                    if opt_filter.is_some() {
+                        return internal_err!("aggregate filter should be applied in partial stage, there should be no filter in final stage");
                     }
-                    _ => {
-                        if opt_filter.is_some() {
-                            return internal_err!("aggregate filter should be applied in partial stage, there should be no filter in final stage");
-                        }
 
-                        // if aggregation is over intermediate states,
-                        // use merge
-                        acc.merge_batch(values, group_indices, None, total_num_groups)?;
-                    }
+                    // if aggregation is over intermediate states,
+                    // use merge
+                    acc.merge_batch(values, group_indices, None, total_num_groups)?;
                 }
             }
-        } */
+        }
         Ok(())
     }
 }
@@ -609,3 +588,9 @@ fn evaluate_group_by(
         })
         .collect()
 }
+
+impl RecordBatchStream for InlineAggregateStream {
+    fn schema(&self) -> SchemaRef {
+        Arc::clone(&self.schema)
+    }
+}
diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs
index fca0f1d38019b..208873256afae 100644
--- a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs
+++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs
@@ -131,19 +131,33 @@ pub struct InlineAggregateExec {
     /// expressions from protobuf for final aggregate.
     pub input_schema: SchemaRef,
     cache: PlanProperties,
+    required_input_ordering: Vec<Option<LexRequirement>>,
 }
 
 impl InlineAggregateExec {
+    /// Try to create an InlineAggregateExec from a standard AggregateExec.
+    ///
+    /// Returns None if the aggregate cannot be converted (e.g., not sorted, uses grouping sets).
     pub fn try_new_from_aggregate(aggregate: &AggregateExec) -> Option<Self> {
-        if matches!(aggregate.input_order_mode(), InputOrderMode::Sorted) {
+        // Only convert Sorted aggregates
+        if !matches!(aggregate.input_order_mode(), InputOrderMode::Sorted) {
             return None;
         }
+
+        // Only support Partial and Final modes
         let mode = match aggregate.mode() {
             AggregateMode::Partial => InlineAggregateMode::Partial,
             AggregateMode::Final => InlineAggregateMode::Final,
             _ => return None,
         };
+
         let group_by = aggregate.group_expr().clone();
+
+        // InlineAggregate doesn't support grouping sets (CUBE/ROLLUP/GROUPING SETS)
+        if !group_by.is_single() {
+            return None;
+        }
+
         let aggr_expr = aggregate.aggr_expr().iter().cloned().collect();
         let filter_expr = aggregate.filter_expr().iter().cloned().collect();
         let limit = aggregate.limit().clone();
@@ -151,6 +165,8 @@ impl InlineAggregateExec {
         let schema = aggregate.schema().clone();
         let input_schema = aggregate.input_schema().clone();
         let cache = aggregate.cache().clone();
+        let required_input_ordering = aggregate.required_input_ordering().clone();
+
         Some(Self {
             mode,
             group_by,
@@ -161,9 +177,22 @@ impl InlineAggregateExec {
             schema,
             input_schema,
             cache,
+            required_input_ordering,
         })
     }
 
+    pub fn mode(&self) -> &InlineAggregateMode {
+        &self.mode
+    }
+
+    pub fn limit(&self) -> Option<usize> {
+        self.limit
+    }
+
+    pub fn aggr_expr(&self) -> &[Arc<AggregateFunctionExpr>] {
+        &self.aggr_expr
+    }
+
     pub fn input(&self) -> &Arc<dyn ExecutionPlan> {
         &self.input
     }
@@ -206,7 +235,7 @@ impl ExecutionPlan for InlineAggregateExec {
     }
 
     fn required_input_ordering(&self) -> Vec<Option<LexRequirement>> {
-        vec![]
+        self.required_input_ordering.clone()
     }
 
     fn maintains_input_order(&self) -> Vec<bool> {
@@ -231,6 +260,7 @@ impl ExecutionPlan for InlineAggregateExec {
             schema: self.schema.clone(),
             input_schema: self.input_schema.clone(),
             cache: self.cache.clone(),
+            required_input_ordering: self.required_input_ordering.clone(),
         };
         Ok(Arc::new(result))
     }
@@ -240,9 +270,8 @@ impl ExecutionPlan for InlineAggregateExec {
         partition: usize,
         context: Arc<TaskContext>,
     ) -> DFResult<SendableRecordBatchStream> {
-        /* self.execute_typed(partition, context)
-        .map(|stream| stream.into()) */
-        todo!()
+        let stream = inline_aggregate_stream::InlineAggregateStream::new(self, context, partition)?;
+        Ok(Box::pin(stream))
     }
 
     fn metrics(&self) -> Option<MetricsSet> {
diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs
index d47c47381b7b6..f53331b45c5e9 100644
--- a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs
+++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs
@@ -274,7 +274,7 @@ impl SortedGroupValues {
         self.group_values[0].len()
     }
 
-    fn emit(&mut self, emit_to: EmitTo) -> DFResult<Vec<ArrayRef>> {
+    pub fn emit(&mut self, emit_to: EmitTo) -> DFResult<Vec<ArrayRef>> {
         let mut output = match emit_to {
             EmitTo::All => {
                 let group_values = mem::take(&mut self.group_values);
diff --git a/rust/cubestore/cubestore/src/queryplanner/optimizations/inline_aggregate_rewriter.rs b/rust/cubestore/cubestore/src/queryplanner/optimizations/inline_aggregate_rewriter.rs
new file mode 100644
index 0000000000000..3a81303a249f6
--- /dev/null
+++ b/rust/cubestore/cubestore/src/queryplanner/optimizations/inline_aggregate_rewriter.rs
@@ -0,0 +1,30 @@
+use crate::queryplanner::inline_aggregate::InlineAggregateExec;
+use datafusion::error::DataFusionError;
+use datafusion::physical_plan::aggregates::AggregateExec;
+use datafusion::physical_plan::ExecutionPlan;
+use std::sync::Arc;
+
+/// Replace sorted AggregateExec node with InlineAggregateExec if possible.
+///
+/// This is a single-node rewriter function designed to be used with `rewrite_physical_plan`.
+/// It replaces standard hash-based aggregates with a more efficient sorted aggregation
+/// implementation when:
+/// - Input is sorted by grouping columns (InputOrderMode::Sorted)
+/// - Mode is Partial or Final
+/// - No grouping sets (CUBE/ROLLUP/GROUPING SETS)
+///
+/// The InlineAggregateExec takes advantage of sorted input to:
+/// - Avoid hash table overhead
+/// - Enable streaming aggregation with bounded memory
+/// - Process groups in order without buffering
+pub fn replace_with_inline_aggregate(
+    plan: Arc<dyn ExecutionPlan>,
+) -> Result<Arc<dyn ExecutionPlan>, DataFusionError> {
+    if let Some(agg) = plan.as_any().downcast_ref::<AggregateExec>() {
+        if let Some(inline_agg) = InlineAggregateExec::try_new_from_aggregate(agg) {
+            return Ok(Arc::new(inline_agg));
+        }
+    }
+
+    Ok(plan)
+}
diff --git a/rust/cubestore/cubestore/src/queryplanner/optimizations/mod.rs b/rust/cubestore/cubestore/src/queryplanner/optimizations/mod.rs
index 1367301d3aee0..0359e64c476db 100644
--- a/rust/cubestore/cubestore/src/queryplanner/optimizations/mod.rs
+++ b/rust/cubestore/cubestore/src/queryplanner/optimizations/mod.rs
@@ -1,5 +1,6 @@
 mod check_memory;
 mod distributed_partial_aggregate;
+mod inline_aggregate_rewriter;
 pub mod rewrite_plan;
 pub mod rolling_optimizer;
 mod trace_data_loaded;
@@ -10,6 +11,7 @@ use crate::queryplanner::optimizations::distributed_partial_aggregate::{
     add_limit_to_workers, ensure_partition_merge, push_aggregate_to_workers,
     replace_suboptimal_merge_sorts,
 };
+use crate::queryplanner::optimizations::inline_aggregate_rewriter::replace_with_inline_aggregate;
 use crate::queryplanner::planning::CubeExtensionPlanner;
 use crate::queryplanner::pretty_printers::{pp_phys_plan_ext, PPOptions};
 use crate::queryplanner::rolling::RollingWindowPlanner;
@@ -141,6 +143,10 @@ fn pre_optimize_physical_plan(
     let p = rewrite_physical_plan(p, &mut |p| ensure_partition_merge_with_acceptable_parent(p))?;
     // Handles the root node case
     let p = ensure_partition_merge(p)?;
+
+    // Replace sorted AggregateExec with InlineAggregateExec for better performance
+    let p = rewrite_physical_plan(p, &mut |p| replace_with_inline_aggregate(p))?;
+
     Ok(p)
 }
 
diff --git a/rust/cubestore/cubestore/src/queryplanner/pretty_printers.rs b/rust/cubestore/cubestore/src/queryplanner/pretty_printers.rs
index fac32b4f8e63c..d9c353d1d1095 100644
--- a/rust/cubestore/cubestore/src/queryplanner/pretty_printers.rs
+++ b/rust/cubestore/cubestore/src/queryplanner/pretty_printers.rs
@@ -28,6 +28,7 @@ use std::sync::Arc;
 
 use crate::queryplanner::check_memory::CheckMemoryExec;
 use crate::queryplanner::filter_by_key_range::FilterByKeyRangeExec;
+use crate::queryplanner::inline_aggregate::{InlineAggregateExec, InlineAggregateMode};
 use crate::queryplanner::merge_sort::LastRowByUniqueKeyExec;
 use crate::queryplanner::panic::{PanicWorkerExec, PanicWorkerNode};
 use crate::queryplanner::planning::{ClusterSendNode, Snapshot, WorkerExec};
@@ -604,6 +605,18 @@ fn pp_phys_plan_indented(p: &dyn ExecutionPlan, indent: usize, o: &PPOptions, ou
             if let Some(limit) = agg.limit() {
                 *out += &format!(", limit: {}", limit)
             }
+        } else if let Some(agg) = a.downcast_ref::<InlineAggregateExec>() {
+            let mode = match agg.mode() {
+                InlineAggregateMode::Partial => "Partial",
+                InlineAggregateMode::Final => "Final",
+            };
+            *out += &format!("{}InlineAggregate", mode);
+            if o.show_aggregations {
+                *out += &format!(", aggs: {:?}", agg.aggr_expr())
+            }
+            if let Some(limit) = agg.limit() {
+                *out += &format!(", limit: {}", limit)
+            }
         } else if let Some(l) = a.downcast_ref::<LocalLimitExec>() {
             *out += &format!("LocalLimit, n: {}", l.fetch());
         } else if let Some(l) = a.downcast_ref::<GlobalLimitExec>() {

From 3b17c39b1929d09d81fd83e65e554fa53b4fedd6 Mon Sep 17 00:00:00 2001
From: Aleksandr Romanenko <alex.romanenko@cube.dev>
Date: Thu, 30 Oct 2025 19:48:15 +0100
Subject: [PATCH 7/9] in work

---
 rust/cubestore/Cargo.lock                     |  15 ++
 .../cubestore-sql-tests/src/tests.rs          |   1 -
 rust/cubestore/cubestore/Cargo.toml           |  12 +-
 rust/cubestore/cubestore/src/config/mod.rs    |   6 +-
 rust/cubestore/cubestore/src/import/mod.rs    |  24 +-
 rust/cubestore/cubestore/src/lib.rs           |   2 +-
 .../inline_aggregate/column_comparator.rs     |   1 +
 .../inline_aggregate_stream.rs                |   7 +-
 .../src/queryplanner/inline_aggregate/mod.rs  |  60 +++++
 .../inline_aggregate/sorted_group_values.rs   | 151 ++++++------
 .../sorted_group_values_rows.rs               | 228 ++++++++++++++++++
 .../src/queryplanner/optimizations/mod.rs     |   2 +-
 .../src/queryplanner/query_executor.rs        |   1 -
 rust/cubestore/cubestore/src/sql/mod.rs       |   1 +
 rust/cubestore/rust-toolchain.toml            |   2 +-
 15 files changed, 410 insertions(+), 103 deletions(-)
 create mode 100644 rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values_rows.rs

diff --git a/rust/cubestore/Cargo.lock b/rust/cubestore/Cargo.lock
index 6a64ae7e5efe8..2bb93b2da9777 100644
--- a/rust/cubestore/Cargo.lock
+++ b/rust/cubestore/Cargo.lock
@@ -219,6 +219,7 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
 [[package]]
 name = "arrow"
 version = "54.2.1"
+source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631"
 dependencies = [
  "arrow-arith",
  "arrow-array",
@@ -238,6 +239,7 @@ dependencies = [
 [[package]]
 name = "arrow-arith"
 version = "54.2.1"
+source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -250,6 +252,7 @@ dependencies = [
 [[package]]
 name = "arrow-array"
 version = "54.2.1"
+source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631"
 dependencies = [
  "ahash 0.8.11",
  "arrow-buffer",
@@ -265,6 +268,7 @@ dependencies = [
 [[package]]
 name = "arrow-buffer"
 version = "54.2.1"
+source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631"
 dependencies = [
  "bytes 1.10.1",
  "half 2.4.1",
@@ -274,6 +278,7 @@ dependencies = [
 [[package]]
 name = "arrow-cast"
 version = "54.2.1"
+source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -293,6 +298,7 @@ dependencies = [
 [[package]]
 name = "arrow-csv"
 version = "54.2.1"
+source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631"
 dependencies = [
  "arrow-array",
  "arrow-cast",
@@ -307,6 +313,7 @@ dependencies = [
 [[package]]
 name = "arrow-data"
 version = "54.2.1"
+source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631"
 dependencies = [
  "arrow-buffer",
  "arrow-schema",
@@ -317,6 +324,7 @@ dependencies = [
 [[package]]
 name = "arrow-ipc"
 version = "54.2.1"
+source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -329,6 +337,7 @@ dependencies = [
 [[package]]
 name = "arrow-json"
 version = "54.2.1"
+source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -347,6 +356,7 @@ dependencies = [
 [[package]]
 name = "arrow-ord"
 version = "54.2.1"
+source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -358,6 +368,7 @@ dependencies = [
 [[package]]
 name = "arrow-row"
 version = "54.2.1"
+source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -369,6 +380,7 @@ dependencies = [
 [[package]]
 name = "arrow-schema"
 version = "54.2.1"
+source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631"
 dependencies = [
  "serde",
 ]
@@ -376,6 +388,7 @@ dependencies = [
 [[package]]
 name = "arrow-select"
 version = "54.2.1"
+source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631"
 dependencies = [
  "ahash 0.8.11",
  "arrow-array",
@@ -388,6 +401,7 @@ dependencies = [
 [[package]]
 name = "arrow-string"
 version = "54.2.1"
+source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -4514,6 +4528,7 @@ dependencies = [
 [[package]]
 name = "parquet"
 version = "54.2.1"
+source = "git+https://github.com/cube-js/arrow-rs.git?branch=cube-46.0.1#03cb44c47e39a51826e4bc1f49aeae4c34b02631"
 dependencies = [
  "aes-gcm",
  "ahash 0.8.11",
diff --git a/rust/cubestore/cubestore-sql-tests/src/tests.rs b/rust/cubestore/cubestore-sql-tests/src/tests.rs
index 253c1c170d0c5..7b1a5d1bbff67 100644
--- a/rust/cubestore/cubestore-sql-tests/src/tests.rs
+++ b/rust/cubestore/cubestore-sql-tests/src/tests.rs
@@ -8339,7 +8339,6 @@ async fn assert_limit_pushdown_using_search_string(
         .unwrap();
     match &res.get_rows()[1].values()[2] {
         TableValue::String(s) => {
-            println!("!! plan {}", s);
             if let Some(ind) = expected_index {
                 if s.find(ind).is_none() {
                     return Err(format!(
diff --git a/rust/cubestore/cubestore/Cargo.toml b/rust/cubestore/cubestore/Cargo.toml
index 83834c7a7e827..b7219248c3007 100644
--- a/rust/cubestore/cubestore/Cargo.toml
+++ b/rust/cubestore/cubestore/Cargo.toml
@@ -28,10 +28,10 @@ cubezetasketch = { path = "../cubezetasketch" }
 cubedatasketches = { path = "../cubedatasketches" }
 cubeshared = { path = "../../cubeshared" }
 cuberpc = { path = "../cuberpc" }
-datafusion = { path = "/Users/war/cube_projects/new_cube/arrow-datafusion/datafusion/core/", features = ["serde"] }
-datafusion-datasource = { path = "/Users/war/cube_projects/new_cube/arrow-datafusion/datafusion/datasource/" }
-datafusion-proto = { path = "/Users/war/cube_projects/new_cube/arrow-datafusion/datafusion/proto/" }
-datafusion-proto-common = { path = "/Users/war/cube_projects/new_cube/arrow-datafusion/datafusion/proto-common/" }
+datafusion = { path = "/Users/aleksandrromanenko/cube_projects/new_cube/arrow-datafusion/datafusion/core/", features = ["serde"] }
+datafusion-datasource = { path = "/Users/aleksandrromanenko/cube_projects/new_cube/arrow-datafusion/datafusion/datasource/" }
+datafusion-proto = { path = "/Users/aleksandrromanenko/cube_projects/new_cube/arrow-datafusion/datafusion/proto/" }
+datafusion-proto-common = { path = "/Users/aleksandrromanenko/cube_projects/new_cube/arrow-datafusion/datafusion/proto-common/" }
 csv = "1.1.3"
 bytes = "1.6.0"
 serde_json = "1.0.56"
@@ -120,8 +120,8 @@ sasl2-sys = { version = "0.1.6", features = ["vendored"] }
 rdkafka = { version = "0.29.0", features = ["cmake-build"] }
 
 [target.'cfg(target_os = "macos")'.dependencies]
-#rdkafka = { version = "0.29.0", features = ["ssl", "gssapi"] }
-#sasl2-sys = { version = "0.1.6", features = ["vendored"] }
+rdkafka = { version = "0.29.0", features = ["ssl", "gssapi"] }
+sasl2-sys = { version = "0.1.6", features = ["vendored"] }
 
 [dev-dependencies]
 pretty_assertions = "0.7.1"
diff --git a/rust/cubestore/cubestore/src/config/mod.rs b/rust/cubestore/cubestore/src/config/mod.rs
index f378a7e376767..31c6bf4a9458d 100644
--- a/rust/cubestore/cubestore/src/config/mod.rs
+++ b/rust/cubestore/cubestore/src/config/mod.rs
@@ -36,8 +36,8 @@ use crate::sql::{SqlService, SqlServiceImpl};
 use crate::sql::{TableExtensionService, TableExtensionServiceImpl};
 use crate::store::compaction::{CompactionService, CompactionServiceImpl};
 use crate::store::{ChunkDataStore, ChunkStore, WALDataStore, WALStore};
-/* use crate::streaming::kafka::{KafkaClientService, KafkaClientServiceImpl};
-use crate::streaming::{KsqlClient, KsqlClientImpl, StreamingService, StreamingServiceImpl}; */
+use crate::streaming::kafka::{KafkaClientService, KafkaClientServiceImpl};
+use crate::streaming::{KsqlClient, KsqlClientImpl, StreamingService, StreamingServiceImpl};
 use crate::table::parquet::{
     CubestoreMetadataCacheFactory, CubestoreMetadataCacheFactoryImpl,
     CubestoreParquetMetadataCache, CubestoreParquetMetadataCacheImpl,
@@ -2194,7 +2194,7 @@ impl Config {
             .register_typed::<dyn ImportService, _, _, _>(async move |i| {
                 ImportServiceImpl::new(
                     i.get_service_typed().await,
-                    //i.get_service_typed().await,
+                    i.get_service_typed().await,
                     i.get_service_typed().await,
                     i.get_service_typed().await,
                     i.get_service_typed().await,
diff --git a/rust/cubestore/cubestore/src/import/mod.rs b/rust/cubestore/cubestore/src/import/mod.rs
index 8a2c4b811504f..f994aeee54301 100644
--- a/rust/cubestore/cubestore/src/import/mod.rs
+++ b/rust/cubestore/cubestore/src/import/mod.rs
@@ -36,7 +36,7 @@ use crate::queryplanner::trace_data_loaded::DataLoadedSize;
 use crate::remotefs::RemoteFs;
 use crate::sql::timestamp_from_string;
 use crate::store::ChunkDataStore;
-//use crate::streaming::StreamingService;
+use crate::streaming::StreamingService;
 use crate::table::data::{append_row, create_array_builders};
 use crate::table::{Row, TableValue};
 use crate::util::batch_memory::columns_vec_buffer_size;
@@ -517,7 +517,7 @@ crate::di_service!(MockImportService, [ImportService]);
 
 pub struct ImportServiceImpl {
     meta_store: Arc<dyn MetaStore>,
-    //streaming_service: Arc<dyn StreamingService>,
+    streaming_service: Arc<dyn StreamingService>,
     chunk_store: Arc<dyn ChunkDataStore>,
     remote_fs: Arc<dyn RemoteFs>,
     config_obj: Arc<dyn ConfigObj>,
@@ -530,7 +530,7 @@ crate::di_service!(ImportServiceImpl, [ImportService]);
 impl ImportServiceImpl {
     pub fn new(
         meta_store: Arc<dyn MetaStore>,
-        //streaming_service: Arc<dyn StreamingService>,
+        streaming_service: Arc<dyn StreamingService>,
         chunk_store: Arc<dyn ChunkDataStore>,
         remote_fs: Arc<dyn RemoteFs>,
         config_obj: Arc<dyn ConfigObj>,
@@ -539,7 +539,7 @@ impl ImportServiceImpl {
     ) -> Arc<ImportServiceImpl> {
         Arc::new(ImportServiceImpl {
             meta_store,
-            //streaming_service,
+            streaming_service,
             chunk_store,
             remote_fs,
             config_obj,
@@ -823,13 +823,13 @@ impl ImportService for ImportServiceImpl {
                 table, location
             )));
         }
-        /* if Table::is_stream_location(location) {
+        if Table::is_stream_location(location) {
             self.streaming_service.stream_table(table, location).await?;
-        } else { */
-        self.do_import(&table, *format, location, data_loaded_size.clone())
-            .await?;
-        self.drop_temp_uploads(&location).await?;
-        //}
+        } else {
+            self.do_import(&table, *format, location, data_loaded_size.clone())
+                .await?;
+            self.drop_temp_uploads(&location).await?;
+        }
 
         Ok(())
     }
@@ -840,11 +840,11 @@ impl ImportService for ImportServiceImpl {
         location: &str,
     ) -> Result<(), CubeError> {
         let table = self.meta_store.get_table_by_id(table_id).await?;
-        /* if Table::is_stream_location(location) {
+        if Table::is_stream_location(location) {
             self.streaming_service
                 .validate_table_location(table, location)
                 .await?;
-        } */
+        }
         Ok(())
     }
 
diff --git a/rust/cubestore/cubestore/src/lib.rs b/rust/cubestore/cubestore/src/lib.rs
index c79c44fd4b2e7..bb9e124341848 100644
--- a/rust/cubestore/cubestore/src/lib.rs
+++ b/rust/cubestore/cubestore/src/lib.rs
@@ -44,7 +44,7 @@ pub mod scheduler;
 pub mod shared;
 pub mod sql;
 pub mod store;
-//pub mod streaming;
+pub mod streaming;
 pub mod sys;
 pub mod table;
 pub mod telemetry;
diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/column_comparator.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/column_comparator.rs
index df4e0c12a4e73..8910861c17f3d 100644
--- a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/column_comparator.rs
+++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/column_comparator.rs
@@ -1,4 +1,5 @@
 use datafusion::arrow::array::*;
+use datafusion::arrow::buffer::BooleanBuffer;
 use datafusion::arrow::datatypes::*;
 use std::marker::PhantomData;
 
diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/inline_aggregate_stream.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/inline_aggregate_stream.rs
index fc29dc584b45d..56f732a716ab2 100644
--- a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/inline_aggregate_stream.rs
+++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/inline_aggregate_stream.rs
@@ -7,7 +7,6 @@ use crate::metastore::multi_index::MultiPartition;
 use crate::metastore::table::Table;
 use crate::metastore::{Column, ColumnType, IdRow, Index, Partition};
 use crate::queryplanner::filter_by_key_range::FilterByKeyRangeExec;
-use crate::queryplanner::inline_aggregate::sorted_group_values::SortedGroupValues;
 use crate::queryplanner::merge_sort::LastRowByUniqueKeyExec;
 use crate::queryplanner::metadata_cache::{MetadataCacheFactory, NoopParquetMetadataCache};
 use crate::queryplanner::optimizations::{CubeQueryPlanner, PreOptimizeRule};
@@ -71,6 +70,7 @@ use datafusion::physical_optimizer::sanity_checker::SanityCheckPlan;
 use datafusion::physical_optimizer::topk_aggregation::TopKAggregation;
 use datafusion::physical_optimizer::update_aggr_exprs::OptimizeAggregateOrder;
 use datafusion::physical_optimizer::PhysicalOptimizerRule;
+use datafusion::physical_plan::aggregates::group_values::GroupValues;
 use datafusion::physical_plan::aggregates::*;
 use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec;
 use datafusion::physical_plan::empty::EmptyExec;
@@ -108,6 +108,7 @@ use std::time::SystemTime;
 use tarpc::context::current;
 use tracing::{instrument, Instrument};
 
+use super::new_sorted_group_values;
 use super::InlineAggregateExec;
 use super::InlineAggregateMode;
 
@@ -136,7 +137,7 @@ pub(crate) struct InlineAggregateStream {
     input_done: bool,
 
     accumulators: Vec<Box<dyn GroupsAccumulator>>,
-    group_values: SortedGroupValues,
+    group_values: Box<dyn GroupValues>,
     current_group_indices: Vec<usize>,
 }
 
@@ -191,7 +192,7 @@ impl InlineAggregateStream {
 
         let exec_state = ExecutionState::ReadingInput;
         let current_group_indices = Vec::with_capacity(batch_size);
-        let group_values = SortedGroupValues::try_new(group_schema)?;
+        let group_values = new_sorted_group_values(group_schema)?;
 
         Ok(InlineAggregateStream {
             schema: agg_schema,
diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs
index 208873256afae..74866b34065c3 100644
--- a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs
+++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs
@@ -1,6 +1,11 @@
 mod column_comparator;
 mod inline_aggregate_stream;
 mod sorted_group_values;
+mod sorted_group_values_rows;
+
+pub use sorted_group_values::SortedGroupValues;
+pub use sorted_group_values_rows::SortedGroupValuesRows;
+
 use crate::cluster::{
     pick_worker_by_ids, pick_worker_by_partitions, Cluster, WorkerPlanningParams,
 };
@@ -72,6 +77,7 @@ use datafusion::physical_optimizer::sanity_checker::SanityCheckPlan;
 use datafusion::physical_optimizer::topk_aggregation::TopKAggregation;
 use datafusion::physical_optimizer::update_aggr_exprs::OptimizeAggregateOrder;
 use datafusion::physical_optimizer::PhysicalOptimizerRule;
+use datafusion::physical_plan::aggregates::group_values::GroupValues;
 use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec;
 use datafusion::physical_plan::empty::EmptyExec;
 use datafusion::physical_plan::execution_plan::{Boundedness, CardinalityEffect, EmissionType};
@@ -309,3 +315,57 @@ impl ExecutionPlan for InlineAggregateExec {
         CardinalityEffect::LowerEqual
     }
 }
+
+/// Creates a new [`GroupValues`] implementation optimized for sorted input data
+///
+/// Chooses between:
+/// - [`SortedGroupValues`]: Fast column-based implementation for supported types
+/// - [`SortedGroupValuesRows`]: Row-based fallback for all other types (Boolean, Struct, List, etc.)
+pub fn new_sorted_group_values(schema: SchemaRef) -> DFResult<Box<dyn GroupValues>> {
+    // Check if all fields are supported by the column-based implementation
+    if supported_schema(schema.as_ref()) {
+        Ok(Box::new(SortedGroupValues::try_new(schema)?))
+    } else {
+        Ok(Box::new(SortedGroupValuesRows::try_new(schema)?))
+    }
+}
+
+/// Returns true if the schema is supported by [`SortedGroupValues`] (column-based implementation)
+fn supported_schema(schema: &datafusion::arrow::datatypes::Schema) -> bool {
+    schema
+        .fields()
+        .iter()
+        .map(|f| f.data_type())
+        .all(supported_type)
+}
+
+/// Returns true if the data type is supported by [`SortedGroupValues`]
+///
+/// Types not in this list will use the row-based [`SortedGroupValuesRows`] implementation
+fn supported_type(data_type: &DataType) -> bool {
+    matches!(
+        *data_type,
+        DataType::Int8
+            | DataType::Int16
+            | DataType::Int32
+            | DataType::Int64
+            | DataType::UInt8
+            | DataType::UInt16
+            | DataType::UInt32
+            | DataType::UInt64
+            | DataType::Float32
+            | DataType::Float64
+            | DataType::Decimal128(_, _)
+            | DataType::Utf8
+            | DataType::LargeUtf8
+            | DataType::Binary
+            | DataType::LargeBinary
+            | DataType::Date32
+            | DataType::Date64
+            | DataType::Time32(_)
+            | DataType::Time64(_)
+            | DataType::Timestamp(_, _)
+            | DataType::Utf8View
+            | DataType::BinaryView
+    )
+}
diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs
index f53331b45c5e9..d9064aaf9ce16 100644
--- a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs
+++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs
@@ -20,6 +20,7 @@ use datafusion::physical_expr::binary_map::OutputType;
 use datafusion::physical_plan::aggregates::group_values::multi_group_by::{
     ByteGroupValueBuilder, ByteViewGroupValueBuilder, PrimitiveGroupValueBuilder,
 };
+use datafusion::physical_plan::aggregates::group_values::GroupValues;
 
 use crate::queryplanner::inline_aggregate::column_comparator::ColumnComparator;
 use crate::{
@@ -70,7 +71,78 @@ impl SortedGroupValues {
         })
     }
 
-    pub fn intern(&mut self, cols: &[ArrayRef], groups: &mut Vec<usize>) -> DFResult<()> {
+    fn intern_impl(&mut self, cols: &[ArrayRef], groups: &mut Vec<usize>) -> DFResult<()> {
+        let n_rows = cols[0].len();
+        groups.clear();
+
+        if n_rows == 0 {
+            return Ok(());
+        }
+
+        // Handle first row - compare with last group or create new group
+        let first_group_idx = self.make_new_group_if_needed(cols, 0);
+        groups.push(first_group_idx);
+
+        if n_rows == 1 {
+            return Ok(());
+        }
+
+        // Prepare buffer for vectorized comparison
+        self.equal_to_results.resize(n_rows - 1, true);
+        self.equal_to_results[..n_rows - 1].fill(true);
+
+        // Vectorized comparison: compare row[i] with row[i+1] for all columns
+        for (col, comparator) in cols.iter().zip(&self.comparators) {
+            comparator.compare_adjacent(col, &mut self.equal_to_results[..n_rows - 1]);
+        }
+
+        // Build groups based on comparison results
+        let mut current_group_idx = first_group_idx;
+        for i in 0..n_rows - 1 {
+            if !self.equal_to_results[i] {
+                // Group boundary detected - add new group
+                for (col_idx, group_value) in self.group_values.iter_mut().enumerate() {
+                    group_value.append_val(&cols[col_idx], i + 1);
+                }
+                current_group_idx = self.group_values[0].len() - 1;
+            }
+            groups.push(current_group_idx);
+        }
+
+        Ok(())
+    }
+
+    /// Compare the specified row with the last group and create a new group if different.
+    ///
+    /// This is used to handle the first row of a batch, which needs to be compared
+    /// with the last group from the previous batch to detect group boundaries across batches.
+    ///
+    /// Returns the group index for this row.
+    fn make_new_group_if_needed(&mut self, cols: &[ArrayRef], row: usize) -> usize {
+        let new_group_needed = if self.group_values[0].len() == 0 {
+            // No groups yet - always create first group
+            true
+        } else {
+            // Compare with last group - if any column differs, need new group
+            self.group_values.iter().enumerate().any(|(i, group_val)| {
+                !group_val.equal_to(self.group_values[0].len() - 1, &cols[i], row)
+            })
+        };
+
+        if new_group_needed {
+            // Add new group with values from this row
+            for (i, group_value) in self.group_values.iter_mut().enumerate() {
+                group_value.append_val(&cols[i], row);
+            }
+        }
+
+        // Return index of the group (either newly created or existing last group)
+        self.group_values[0].len() - 1
+    }
+}
+
+impl GroupValues for SortedGroupValues {
+    fn intern(&mut self, cols: &[ArrayRef], groups: &mut Vec<usize>) -> DFResult<()> {
         if self.group_values.is_empty() {
             let mut v = Vec::with_capacity(cols.len());
             let mut comparators = Vec::with_capacity(cols.len());
@@ -257,16 +329,16 @@ impl SortedGroupValues {
         self.intern_impl(cols, groups)
     }
 
-    pub fn size(&self) -> usize {
+    fn size(&self) -> usize {
         let group_values_size: usize = self.group_values.iter().map(|v| v.size()).sum();
         group_values_size
     }
 
-    pub fn is_empty(&self) -> bool {
+    fn is_empty(&self) -> bool {
         self.len() == 0
     }
 
-    pub fn len(&self) -> usize {
+    fn len(&self) -> usize {
         if self.group_values.is_empty() {
             return 0;
         }
@@ -274,7 +346,7 @@ impl SortedGroupValues {
         self.group_values[0].len()
     }
 
-    pub fn emit(&mut self, emit_to: EmitTo) -> DFResult<Vec<ArrayRef>> {
+    fn emit(&mut self, emit_to: EmitTo) -> DFResult<Vec<ArrayRef>> {
         let mut output = match emit_to {
             EmitTo::All => {
                 let group_values = mem::take(&mut self.group_values);
@@ -318,73 +390,4 @@ impl SortedGroupValues {
         self.rows_inds.clear();
         self.equal_to_results.clear();
     }
-
-    fn intern_impl(&mut self, cols: &[ArrayRef], groups: &mut Vec<usize>) -> DFResult<()> {
-        let n_rows = cols[0].len();
-        groups.clear();
-
-        if n_rows == 0 {
-            return Ok(());
-        }
-
-        // Handle first row - compare with last group or create new group
-        let first_group_idx = self.make_new_group_if_needed(cols, 0);
-        groups.push(first_group_idx);
-
-        if n_rows == 1 {
-            return Ok(());
-        }
-
-        // Prepare buffer for vectorized comparison
-        self.equal_to_results.resize(n_rows - 1, true);
-        self.equal_to_results[..n_rows - 1].fill(true);
-
-        // Vectorized comparison: compare row[i] with row[i+1] for all columns
-        for (col, comparator) in cols.iter().zip(&self.comparators) {
-            comparator.compare_adjacent(col, &mut self.equal_to_results[..n_rows - 1]);
-        }
-
-        // Build groups based on comparison results
-        let mut current_group_idx = first_group_idx;
-        for i in 0..n_rows - 1 {
-            if !self.equal_to_results[i] {
-                // Group boundary detected - add new group
-                for (col_idx, group_value) in self.group_values.iter_mut().enumerate() {
-                    group_value.append_val(&cols[col_idx], i + 1);
-                }
-                current_group_idx = self.group_values[0].len() - 1;
-            }
-            groups.push(current_group_idx);
-        }
-
-        Ok(())
-    }
-
-    /// Compare the specified row with the last group and create a new group if different.
-    ///
-    /// This is used to handle the first row of a batch, which needs to be compared
-    /// with the last group from the previous batch to detect group boundaries across batches.
-    ///
-    /// Returns the group index for this row.
-    fn make_new_group_if_needed(&mut self, cols: &[ArrayRef], row: usize) -> usize {
-        let new_group_needed = if self.group_values[0].len() == 0 {
-            // No groups yet - always create first group
-            true
-        } else {
-            // Compare with last group - if any column differs, need new group
-            self.group_values.iter().enumerate().any(|(i, group_val)| {
-                !group_val.equal_to(self.group_values[0].len() - 1, &cols[i], row)
-            })
-        };
-
-        if new_group_needed {
-            // Add new group with values from this row
-            for (i, group_value) in self.group_values.iter_mut().enumerate() {
-                group_value.append_val(&cols[i], row);
-            }
-        }
-
-        // Return index of the group (either newly created or existing last group)
-        self.group_values[0].len() - 1
-    }
 }
diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values_rows.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values_rows.rs
new file mode 100644
index 0000000000000..199cce192e587
--- /dev/null
+++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values_rows.rs
@@ -0,0 +1,228 @@
+use datafusion::logical_expr::EmitTo;
+use std::mem::{self, size_of};
+
+use datafusion::arrow::array::{Array, ArrayRef, ListArray, RecordBatch, StructArray};
+use datafusion::arrow::compute::cast;
+use datafusion::arrow::datatypes::{DataType, SchemaRef};
+use datafusion::arrow::row::{RowConverter, Rows, SortField};
+use datafusion::dfschema::internal_err;
+use datafusion::error::{DataFusionError, Result as DFResult};
+use datafusion::physical_plan::aggregates::group_values::GroupValues;
+
+use std::sync::Arc;
+
+/// A [`GroupValues`] implementation optimized for sorted input data
+///
+/// This is a specialized implementation for sorted data that:
+/// - Does not use a hash table (unlike `GroupValuesRows`)
+/// - Detects group boundaries by comparing adjacent rows
+/// - Works for any data type including Boolean, Struct, List, etc.
+///
+/// It uses the arrow-rs [`Rows`] format for efficient row-wise storage and comparison.
+pub struct SortedGroupValuesRows {
+    /// The output schema
+    schema: SchemaRef,
+
+    /// Converter for the group values
+    row_converter: RowConverter,
+
+    /// The actual group by values, stored in arrow [`Row`] format.
+    /// `group_values[i]` holds the group value for group_index `i`.
+    ///
+    /// The row format is used to compare group keys quickly and store
+    /// them efficiently in memory. Quick comparison is especially
+    /// important for multi-column group keys.
+    ///
+    /// [`Row`]: arrow::row::Row
+    group_values: Option<Rows>,
+
+    /// Reused buffer to store rows
+    rows_buffer: Rows,
+}
+
+impl SortedGroupValuesRows {
+    pub fn try_new(schema: SchemaRef) -> DFResult<Self> {
+        let row_converter = RowConverter::new(
+            schema
+                .fields()
+                .iter()
+                .map(|f| SortField::new(f.data_type().clone()))
+                .collect(),
+        )?;
+
+        let starting_rows_capacity = 1000;
+        let starting_data_capacity = 64 * starting_rows_capacity;
+        let rows_buffer =
+            row_converter.empty_rows(starting_rows_capacity, starting_data_capacity);
+
+        Ok(Self {
+            schema,
+            row_converter,
+            group_values: None,
+            rows_buffer,
+        })
+    }
+
+    fn intern_impl(&mut self, cols: &[ArrayRef], groups: &mut Vec<usize>) -> DFResult<()> {
+        // Convert the group keys into the row format
+        self.rows_buffer.clear();
+        self.row_converter.append(&mut self.rows_buffer, cols)?;
+        let n_rows = self.rows_buffer.num_rows();
+
+        groups.clear();
+
+        if n_rows == 0 {
+            return Ok(());
+        }
+
+        let mut group_values = match self.group_values.take() {
+            Some(group_values) => group_values,
+            None => self.row_converter.empty_rows(0, 0),
+        };
+
+        // Handle first row - compare with last group or create new group
+        let new_group_needed = if group_values.num_rows() == 0 {
+            // No groups yet - always create first group
+            true
+        } else {
+            // Compare with last group - if differs, need new group
+            let last_group_idx = group_values.num_rows() - 1;
+            group_values.row(last_group_idx) != self.rows_buffer.row(0)
+        };
+
+        if new_group_needed {
+            // Add new group with values from first row
+            group_values.push(self.rows_buffer.row(0));
+        }
+
+        let first_group_idx = group_values.num_rows() - 1;
+        groups.push(first_group_idx);
+
+        if n_rows == 1 {
+            self.group_values = Some(group_values);
+            return Ok(());
+        }
+
+        // Build groups based on comparison of adjacent rows
+        let mut current_group_idx = first_group_idx;
+        for i in 0..n_rows - 1 {
+            // Compare row[i] with row[i+1]
+            if self.rows_buffer.row(i) != self.rows_buffer.row(i + 1) {
+                // Group boundary detected - add new group
+                group_values.push(self.rows_buffer.row(i + 1));
+                current_group_idx = group_values.num_rows() - 1;
+            }
+            groups.push(current_group_idx);
+        }
+
+        self.group_values = Some(group_values);
+        Ok(())
+    }
+}
+
+impl GroupValues for SortedGroupValuesRows {
+    fn intern(&mut self, cols: &[ArrayRef], groups: &mut Vec<usize>) -> DFResult<()> {
+        self.intern_impl(cols, groups)
+    }
+
+    fn size(&self) -> usize {
+        let group_values_size = self.group_values.as_ref().map(|v| v.size()).unwrap_or(0);
+        self.row_converter.size() + group_values_size + self.rows_buffer.size()
+    }
+
+    fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    fn len(&self) -> usize {
+        self.group_values
+            .as_ref()
+            .map(|group_values| group_values.num_rows())
+            .unwrap_or(0)
+    }
+
+    fn emit(&mut self, emit_to: EmitTo) -> DFResult<Vec<ArrayRef>> {
+        let mut group_values = self
+            .group_values
+            .take()
+            .expect("Can not emit from empty rows");
+
+        let mut output = match emit_to {
+            EmitTo::All => {
+                let output = self.row_converter.convert_rows(&group_values)?;
+                group_values.clear();
+                output
+            }
+            EmitTo::First(n) => {
+                let groups_rows = group_values.iter().take(n);
+                let output = self.row_converter.convert_rows(groups_rows)?;
+                // Clear out first n group keys by copying them to a new Rows.
+                let mut new_group_values = self.row_converter.empty_rows(0, 0);
+                for row in group_values.iter().skip(n) {
+                    new_group_values.push(row);
+                }
+                std::mem::swap(&mut new_group_values, &mut group_values);
+                output
+            }
+        };
+
+        // Handle dictionary encoding for output
+        for (field, array) in self.schema.fields.iter().zip(&mut output) {
+            let expected = field.data_type();
+            *array =
+                dictionary_encode_if_necessary(Arc::<dyn Array>::clone(array), expected)?;
+        }
+
+        self.group_values = Some(group_values);
+        Ok(output)
+    }
+
+    fn clear_shrink(&mut self, _batch: &RecordBatch) {
+        self.group_values = self.group_values.take().map(|mut rows| {
+            rows.clear();
+            rows
+        });
+    }
+}
+
+fn dictionary_encode_if_necessary(
+    array: ArrayRef,
+    expected: &DataType,
+) -> DFResult<ArrayRef> {
+    match (expected, array.data_type()) {
+        (DataType::Struct(expected_fields), _) => {
+            let struct_array = array.as_any().downcast_ref::<StructArray>().unwrap();
+            let arrays = expected_fields
+                .iter()
+                .zip(struct_array.columns())
+                .map(|(expected_field, column)| {
+                    dictionary_encode_if_necessary(
+                        Arc::<dyn Array>::clone(column),
+                        expected_field.data_type(),
+                    )
+                })
+                .collect::<DFResult<Vec<_>>>()?;
+
+            Ok(Arc::new(StructArray::try_new(
+                expected_fields.clone(),
+                arrays,
+                struct_array.nulls().cloned(),
+            )?))
+        }
+        (DataType::List(expected_field), &DataType::List(_)) => {
+            let list = array.as_any().downcast_ref::<ListArray>().unwrap();
+
+            Ok(Arc::new(ListArray::try_new(
+                Arc::<datafusion::arrow::datatypes::Field>::clone(expected_field),
+                list.offsets().clone(),
+                dictionary_encode_if_necessary(
+                    Arc::<dyn Array>::clone(list.values()),
+                    expected_field.data_type(),
+                )?,
+                list.nulls().cloned(),
+            )?))
+        }
+        (DataType::Dictionary(_, _), _) => Ok(cast(array.as_ref(), expected)?),
+        (_, _) => Ok(Arc::<dyn Array>::clone(&array)),
+    }
+}
diff --git a/rust/cubestore/cubestore/src/queryplanner/optimizations/mod.rs b/rust/cubestore/cubestore/src/queryplanner/optimizations/mod.rs
index 0359e64c476db..5746261938e1f 100644
--- a/rust/cubestore/cubestore/src/queryplanner/optimizations/mod.rs
+++ b/rust/cubestore/cubestore/src/queryplanner/optimizations/mod.rs
@@ -145,7 +145,7 @@ fn pre_optimize_physical_plan(
     let p = ensure_partition_merge(p)?;
 
     // Replace sorted AggregateExec with InlineAggregateExec for better performance
-    let p = rewrite_physical_plan(p, &mut |p| replace_with_inline_aggregate(p))?;
+    //let p = rewrite_physical_plan(p, &mut |p| replace_with_inline_aggregate(p))?;
 
     Ok(p)
 }
diff --git a/rust/cubestore/cubestore/src/queryplanner/query_executor.rs b/rust/cubestore/cubestore/src/queryplanner/query_executor.rs
index 7072ad8c59180..ccb164a15a8a7 100644
--- a/rust/cubestore/cubestore/src/queryplanner/query_executor.rs
+++ b/rust/cubestore/cubestore/src/queryplanner/query_executor.rs
@@ -1178,7 +1178,6 @@ impl ExecutionPlan for CubeTableExec {
         mut partition: usize,
         context: Arc<TaskContext>,
     ) -> Result<SendableRecordBatchStream, DataFusionError> {
-        println!("!!! Table exec: {}, {}", self.partition_execs.len(), partition);
         let exec = self
             .partition_execs
             .iter()
diff --git a/rust/cubestore/cubestore/src/sql/mod.rs b/rust/cubestore/cubestore/src/sql/mod.rs
index 2cacd97ffe6af..67d2fb351967e 100644
--- a/rust/cubestore/cubestore/src/sql/mod.rs
+++ b/rust/cubestore/cubestore/src/sql/mod.rs
@@ -2846,6 +2846,7 @@ mod tests {
                 let mut bools = Vec::new();
                 for i in 0..1000 {
                     bools.push(i % (batch + 1) == 0);
+
                 }
 
                 let values = bools.into_iter().map(|b| format!("({})", b)).join(", ");
diff --git a/rust/cubestore/rust-toolchain.toml b/rust/cubestore/rust-toolchain.toml
index ad8132da3e1bc..935f99e36558c 100644
--- a/rust/cubestore/rust-toolchain.toml
+++ b/rust/cubestore/rust-toolchain.toml
@@ -1,4 +1,4 @@
 [toolchain]
-channel = "nightly-2025-08-01"
+channel = "nightly-2024-10-30"
 components = ["rustfmt", "clippy"]
 profile = "minimal"

From fb5799e1ac5f919f96e803faa38a81ee39390928 Mon Sep 17 00:00:00 2001
From: Aleksandr Romanenko <alex.romanenko@cube.dev>
Date: Thu, 30 Oct 2025 19:50:58 +0100
Subject: [PATCH 8/9] in work

---
 .../queryplanner/inline_aggregate/column_comparator.rs    | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/column_comparator.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/column_comparator.rs
index 8910861c17f3d..2e3c6bb50e07d 100644
--- a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/column_comparator.rs
+++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/column_comparator.rs
@@ -119,8 +119,8 @@ where
                 }
             } else {
                 // Use iterator which handles nulls efficiently
-                let mut iter1 = array.iter();
-                let mut iter2 = array.iter().skip(1);
+                let iter1 = array.iter();
+                let iter2 = array.iter().skip(1);
 
                 for (i, (v1, v2)) in iter1.zip(iter2).enumerate() {
                     if equal_results[i] {
@@ -174,8 +174,8 @@ where
                 }
             } else {
                 // Handle nulls via iterator
-                let mut iter1 = array.iter();
-                let mut iter2 = array.iter().skip(1);
+                let iter1 = array.iter();
+                let iter2 = array.iter().skip(1);
 
                 for (i, (v1, v2)) in iter1.zip(iter2).enumerate() {
                     if equal_results[i] {

From 5762d5c802718bfa59788841380e635712ee1399 Mon Sep 17 00:00:00 2001
From: Aleksandr Romanenko <alex.romanenko@cube.dev>
Date: Fri, 31 Oct 2025 14:32:24 +0100
Subject: [PATCH 9/9] in work

---
 rust/cubestore/Cargo.lock                     |  26 +++
 rust/cubestore/cubedatasketches/src/error.rs  |   8 +-
 rust/cubestore/cubedatasketches/src/native.rs |  16 +-
 rust/cubestore/cubehll/src/error.rs           |   8 +-
 rust/cubestore/cubehll/src/instance.rs        | 156 +++++++++---------
 rust/cubestore/cubehll/src/sketch.rs          |  24 +--
 .../cubestore-sql-tests/src/benches.rs        |  36 ++--
 .../cubestore-sql-tests/src/files.rs          |   6 +-
 rust/cubestore/cubestore-sql-tests/src/lib.rs |   2 +-
 .../cubestore-sql-tests/src/multiproc.rs      |   6 +-
 .../cubestore/cubestore-sql-tests/src/rows.rs |   2 +-
 .../cubestore-sql-tests/src/tests.rs          | 108 ++++++------
 .../cubestore-sql-tests/tests/cluster.rs      |   2 +-
 .../cubestore-sql-tests/tests/migration.rs    |   4 +-
 rust/cubestore/cubestore/Cargo.toml           |   8 +-
 .../cubestore/benches/cachestore_queue.rs     |   8 +-
 .../cubestore/cubestore/src/bin/cubestored.rs |   2 +-
 rust/cubestore/cubestore/src/config/mod.rs    |  46 +++---
 .../inline_aggregate/column_comparator.rs     |   1 -
 .../inline_aggregate_stream.rs                | 148 +----------------
 .../src/queryplanner/inline_aggregate/mod.rs  | 102 ++----------
 .../inline_aggregate/sorted_group_values.rs   |   7 +-
 .../sorted_group_values_rows.rs               |  15 +-
 .../cubestore/src/queryplanner/mod.rs         |   2 +-
 .../src/queryplanner/optimizations/mod.rs     |   2 +-
 .../src/queryplanner/physical_plan_flags.rs   |  10 +-
 .../src/queryplanner/pretty_printers.rs       |   2 +-
 rust/cubestore/cubezetasketch/src/data.rs     |  12 +-
 .../cubezetasketch/src/difference_encoding.rs |  10 +-
 rust/cubestore/cubezetasketch/src/encoding.rs |  27 ++-
 rust/cubestore/cubezetasketch/src/error.rs    |  10 +-
 rust/cubestore/cubezetasketch/src/normal.rs   |  18 +-
 rust/cubestore/cubezetasketch/src/sketch.rs   |  28 ++--
 rust/cubestore/cubezetasketch/src/sparse.rs   |  30 ++--
 rust/cubestore/cubezetasketch/src/state.rs    |  20 +--
 35 files changed, 356 insertions(+), 556 deletions(-)

diff --git a/rust/cubestore/Cargo.lock b/rust/cubestore/Cargo.lock
index 2bb93b2da9777..6f2149f1d840b 100644
--- a/rust/cubestore/Cargo.lock
+++ b/rust/cubestore/Cargo.lock
@@ -1723,6 +1723,7 @@ checksum = "c2e66c9d817f1720209181c316d28635c050fa304f9c79e47a520882661b7308"
 [[package]]
 name = "datafusion"
 version = "46.0.1"
+source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a"
 dependencies = [
  "arrow",
  "arrow-ipc",
@@ -1775,6 +1776,7 @@ dependencies = [
 [[package]]
 name = "datafusion-catalog"
 version = "46.0.1"
+source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1793,6 +1795,7 @@ dependencies = [
 [[package]]
 name = "datafusion-catalog-listing"
 version = "46.0.1"
+source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1813,6 +1816,7 @@ dependencies = [
 [[package]]
 name = "datafusion-common"
 version = "46.0.1"
+source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a"
 dependencies = [
  "ahash 0.8.11",
  "arrow",
@@ -1835,6 +1839,7 @@ dependencies = [
 [[package]]
 name = "datafusion-common-runtime"
 version = "46.0.1"
+source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a"
 dependencies = [
  "log",
  "tokio",
@@ -1843,6 +1848,7 @@ dependencies = [
 [[package]]
 name = "datafusion-datasource"
 version = "46.0.1"
+source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a"
 dependencies = [
  "arrow",
  "async-compression 0.4.17",
@@ -1875,10 +1881,12 @@ dependencies = [
 [[package]]
 name = "datafusion-doc"
 version = "46.0.1"
+source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a"
 
 [[package]]
 name = "datafusion-execution"
 version = "46.0.1"
+source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a"
 dependencies = [
  "arrow",
  "dashmap",
@@ -1898,6 +1906,7 @@ dependencies = [
 [[package]]
 name = "datafusion-expr"
 version = "46.0.1"
+source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a"
 dependencies = [
  "arrow",
  "chrono",
@@ -1917,6 +1926,7 @@ dependencies = [
 [[package]]
 name = "datafusion-expr-common"
 version = "46.0.1"
+source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -1928,6 +1938,7 @@ dependencies = [
 [[package]]
 name = "datafusion-functions"
 version = "46.0.1"
+source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a"
 dependencies = [
  "arrow",
  "arrow-buffer",
@@ -1955,6 +1966,7 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-aggregate"
 version = "46.0.1"
+source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a"
 dependencies = [
  "ahash 0.8.11",
  "arrow",
@@ -1974,6 +1986,7 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-aggregate-common"
 version = "46.0.1"
+source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a"
 dependencies = [
  "ahash 0.8.11",
  "arrow",
@@ -1985,6 +1998,7 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-nested"
 version = "46.0.1"
+source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a"
 dependencies = [
  "arrow",
  "arrow-ord",
@@ -2004,6 +2018,7 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-table"
 version = "46.0.1"
+source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2018,6 +2033,7 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-window"
 version = "46.0.1"
+source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a"
 dependencies = [
  "datafusion-common",
  "datafusion-doc",
@@ -2033,6 +2049,7 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-window-common"
 version = "46.0.1"
+source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a"
 dependencies = [
  "datafusion-common",
  "datafusion-physical-expr-common",
@@ -2041,6 +2058,7 @@ dependencies = [
 [[package]]
 name = "datafusion-macros"
 version = "46.0.1"
+source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a"
 dependencies = [
  "datafusion-expr",
  "quote",
@@ -2050,6 +2068,7 @@ dependencies = [
 [[package]]
 name = "datafusion-optimizer"
 version = "46.0.1"
+source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a"
 dependencies = [
  "arrow",
  "chrono",
@@ -2067,6 +2086,7 @@ dependencies = [
 [[package]]
 name = "datafusion-physical-expr"
 version = "46.0.1"
+source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a"
 dependencies = [
  "ahash 0.8.11",
  "arrow",
@@ -2087,6 +2107,7 @@ dependencies = [
 [[package]]
 name = "datafusion-physical-expr-common"
 version = "46.0.1"
+source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a"
 dependencies = [
  "ahash 0.8.11",
  "arrow",
@@ -2099,6 +2120,7 @@ dependencies = [
 [[package]]
 name = "datafusion-physical-optimizer"
 version = "46.0.1"
+source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2116,6 +2138,7 @@ dependencies = [
 [[package]]
 name = "datafusion-physical-plan"
 version = "46.0.1"
+source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a"
 dependencies = [
  "ahash 0.8.11",
  "arrow",
@@ -2147,6 +2170,7 @@ dependencies = [
 [[package]]
 name = "datafusion-proto"
 version = "46.0.1"
+source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a"
 dependencies = [
  "arrow",
  "chrono",
@@ -2161,6 +2185,7 @@ dependencies = [
 [[package]]
 name = "datafusion-proto-common"
 version = "46.0.1"
+source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2170,6 +2195,7 @@ dependencies = [
 [[package]]
 name = "datafusion-sql"
 version = "46.0.1"
+source = "git+https://github.com/cube-js/arrow-datafusion?branch=cube-46.0.1#2e928bb7230630e8d8e66ed8fcc1b6d759302a8a"
 dependencies = [
  "arrow",
  "bigdecimal 0.4.8",
diff --git a/rust/cubestore/cubedatasketches/src/error.rs b/rust/cubestore/cubedatasketches/src/error.rs
index 1459d86f3dbaf..6ea4f5705a377 100644
--- a/rust/cubestore/cubedatasketches/src/error.rs
+++ b/rust/cubestore/cubedatasketches/src/error.rs
@@ -30,21 +30,21 @@ impl Display for DataSketchesError {
 
 impl DataSketchesError {
     pub fn new<Str: ToString>(message: Str) -> Self {
-        return Self {
+        Self {
             message: message.to_string(),
-        };
+        }
     }
 }
 
 impl From<std::io::Error> for DataSketchesError {
     fn from(err: std::io::Error) -> Self {
-        return DataSketchesError::new(err);
+        DataSketchesError::new(err)
     }
 }
 
 #[cfg(not(target_os = "windows"))]
 impl From<dsrs::DataSketchesError> for DataSketchesError {
     fn from(err: dsrs::DataSketchesError) -> Self {
-        return DataSketchesError::new(err);
+        DataSketchesError::new(err)
     }
 }
diff --git a/rust/cubestore/cubedatasketches/src/native.rs b/rust/cubestore/cubedatasketches/src/native.rs
index 1cbec0c0ecf97..e379c43098767 100644
--- a/rust/cubestore/cubedatasketches/src/native.rs
+++ b/rust/cubestore/cubedatasketches/src/native.rs
@@ -37,22 +37,22 @@ impl Debug for HLLDataSketch {
 
 impl HLLDataSketch {
     pub fn read(data: &[u8]) -> Result<Self> {
-        return Ok(Self {
+        Ok(Self {
             instance: HLLSketch::deserialize(data)?,
-        });
+        })
     }
 
     pub fn cardinality(&self) -> u64 {
-        return self.instance.estimate().round() as u64;
+        self.instance.estimate().round() as u64
     }
 
     pub fn get_lg_config_k(&self) -> u8 {
-        return self.instance.get_lg_config_k();
+        self.instance.get_lg_config_k()
     }
 
     pub fn write(&self) -> Vec<u8> {
         // TODO(ovr): Better way?
-        self.instance.serialize().as_ref().iter().copied().collect()
+        self.instance.serialize().as_ref().to_vec()
     }
 }
 
@@ -80,13 +80,13 @@ impl HLLUnionDataSketch {
     }
 
     pub fn get_lg_config_k(&self) -> u8 {
-        return self.instance.get_lg_config_k();
+        self.instance.get_lg_config_k()
     }
 
     pub fn write(&self) -> Vec<u8> {
         let sketch = self.instance.sketch(HLLType::HLL_4);
         // TODO(ovr): Better way?
-        sketch.serialize().as_ref().iter().copied().collect()
+        sketch.serialize().as_ref().to_vec()
     }
 
     pub fn merge_with(&mut self, other: HLLDataSketch) -> Result<()> {
@@ -107,6 +107,6 @@ impl HLLUnionDataSketch {
         //
         // This function is supposed to be exact, but it is not exact.
 
-        return 32 + k;
+        32 + k
     }
 }
diff --git a/rust/cubestore/cubehll/src/error.rs b/rust/cubestore/cubehll/src/error.rs
index 428a00639ed0d..978bb2f114abf 100644
--- a/rust/cubestore/cubehll/src/error.rs
+++ b/rust/cubestore/cubehll/src/error.rs
@@ -14,20 +14,20 @@ impl Display for HllError {
 
 impl HllError {
     pub fn new<Str: ToString>(message: Str) -> HllError {
-        return HllError {
+        HllError {
             message: message.to_string(),
-        };
+        }
     }
 }
 
 impl From<std::io::Error> for HllError {
     fn from(err: std::io::Error) -> Self {
-        return HllError::new(err);
+        HllError::new(err)
     }
 }
 
 impl From<serde_json::Error> for HllError {
     fn from(err: serde_json::Error) -> Self {
-        return HllError::new(err);
+        HllError::new(err)
     }
 }
diff --git a/rust/cubestore/cubehll/src/instance.rs b/rust/cubestore/cubehll/src/instance.rs
index 62ff469805bea..a9149d87d74ee 100644
--- a/rust/cubestore/cubehll/src/instance.rs
+++ b/rust/cubestore/cubehll/src/instance.rs
@@ -36,16 +36,16 @@ pub const MAX_BUCKETS: u32 = 65536;
 impl HllInstance {
     pub fn new(num_buckets: u32) -> Result<HllInstance> {
         assert!(num_buckets <= MAX_BUCKETS);
-        return Ok(HllInstance::Sparse(SparseHll::new(index_bit_length(
+        Ok(HllInstance::Sparse(SparseHll::new(index_bit_length(
             num_buckets,
-        )?)?));
+        )?)?))
     }
 
     pub fn num_buckets(&self) -> u32 {
-        return match self {
+        match self {
             Sparse(s) => number_of_buckets(s.index_bit_len),
             Dense(d) => number_of_buckets(d.index_bit_len),
-        };
+        }
     }
 
     /// Callers must check that `num_buckets()` is the same for `self` and `other`.
@@ -61,10 +61,10 @@ impl HllInstance {
     }
 
     pub fn index_bit_len(&self) -> u8 {
-        return match self {
+        match self {
             Sparse(s) => s.index_bit_len,
             Dense(d) => d.index_bit_len,
-        };
+        }
     }
 
     /// Returns true iff `self.make_dense_if_necessary` has to be run.
@@ -75,15 +75,15 @@ impl HllInstance {
                 l.merge_with(r);
                 // We need the make this call, but borrow checker won't let us use `self` here.
                 // self.make_dense_if_necessary();
-                return true;
+                true
             }
             (Dense(l), Sparse(r)) => {
                 l.merge_with_sparse(r);
-                return false;
+                false
             }
             (l, Dense(r)) => {
                 l.ensure_dense().merge_with(r);
-                return false;
+                false
             }
         }
     }
@@ -122,7 +122,7 @@ impl HllInstance {
                     "Cannot read HLL with undefined encoding".to_string(),
                 ))
             }
-            n if 1 <= n && n <= 4 => n,
+            n if (1..=4).contains(&n) => n,
             n => {
                 return Err(HllError::new(format!(
                     "Unknown HLL encoding ordinal: {}",
@@ -131,7 +131,7 @@ impl HllInstance {
             }
         };
         let reg_width = 1 + ((data[1] & 0b11100000) >> 5);
-        if reg_width < 1 || 6 < reg_width {
+        if !(1..=6).contains(&reg_width) {
             return Err(HllError::new(format!(
                 "Register width must be between 1 and 6, got {}",
                 reg_width
@@ -139,7 +139,7 @@ impl HllInstance {
         }
         let log_num_buckets = data[1] & 0b00011111;
         // Note: the upper limit in storage spec is 31, but our implementation is limited to 16.
-        if log_num_buckets < 4 || 16 < log_num_buckets {
+        if !(4..=16).contains(&log_num_buckets) {
             return Err(HllError::new(format!(
                 "Log2m must be between 4 and 16, got {}",
                 log_num_buckets
@@ -158,7 +158,7 @@ impl HllInstance {
                         data.len()
                     )));
                 }
-                return HllInstance::new(num_buckets);
+                HllInstance::new(num_buckets)
             }
             ENC_EXPLICIT => {
                 if data.len() % 8 != 0 {
@@ -216,11 +216,11 @@ impl HllInstance {
                     values.push(zeroes as u8);
                 }
 
-                return Ok(HllInstance::Sparse(SparseHll::new_from_indices_and_values(
+                Ok(HllInstance::Sparse(SparseHll::new_from_indices_and_values(
                     log_num_buckets,
                     indices,
                     &values,
-                )?));
+                )?))
             }
             ENC_SPARSE => {
                 let mut cursor = BitCursor::new(data);
@@ -231,11 +231,11 @@ impl HllInstance {
                     indices.push((e >> reg_width) as u32);
                     values.push((e & ((1 << reg_width) - 1)) as u8);
                 }
-                return Ok(HllInstance::Sparse(SparseHll::new_from_indices_and_values(
+                Ok(HllInstance::Sparse(SparseHll::new_from_indices_and_values(
                     log_num_buckets,
                     indices,
                     &values,
-                )?));
+                )?))
             }
             ENC_FULL => {
                 let expected_bits = num_buckets * reg_width as u32;
@@ -253,10 +253,10 @@ impl HllInstance {
                 for _ in 0..num_buckets {
                     values.push(cursor.read_bits(reg_width as usize).unwrap() as u8)
                 }
-                return Ok(HllInstance::Dense(DenseHll::new_from_entries(
+                Ok(HllInstance::Dense(DenseHll::new_from_entries(
                     log_num_buckets,
                     values,
-                )?));
+                )?))
             }
             enc => panic!("Unhandled encoding ordinal {}", enc),
         }
@@ -306,19 +306,19 @@ impl HllInstance {
         if data.is_empty() {
             return Err(HllError::new("hll input data is empty"));
         }
-        return match data[0] {
+        match data[0] {
             TAG_SPARSE_V2 => Ok(HllInstance::Sparse(SparseHll::read(&data[1..])?)),
             TAG_DENSE_V1 => Ok(HllInstance::Dense(DenseHll::read_v1(&data[1..])?)),
             TAG_DENSE_V2 => Ok(HllInstance::Dense(DenseHll::read(&data[1..])?)),
             _ => Err(HllError::new(format!("invalid hll format tag {}", data[0]))),
-        };
+        }
     }
 
     pub fn write(&self) -> Vec<u8> {
-        return match self {
+        match self {
             Sparse(s) => s.write(),
             Dense(s) => s.write(),
-        };
+        }
     }
 
     fn ensure_dense(&mut self) -> &mut DenseHll {
@@ -379,10 +379,10 @@ impl SparseHll {
 
     pub fn new(index_bit_len: u8) -> Result<SparseHll> {
         SparseHll::is_valid_bit_len(index_bit_len)?;
-        return Ok(SparseHll {
+        Ok(SparseHll {
             index_bit_len,
             entries: Vec::with_capacity(1),
-        });
+        })
     }
 
     fn new_from_indices_and_values(
@@ -419,8 +419,7 @@ impl SparseHll {
         }
 
         // Sort by bucket index.
-        entries
-            .sort_unstable_by(|l, r| (l >> (32 - index_bit_len)).cmp(&(r >> (32 - index_bit_len))));
+        entries.sort_unstable_by_key(|l| l >> (32 - index_bit_len));
 
         Ok(SparseHll {
             index_bit_len,
@@ -442,10 +441,10 @@ impl SparseHll {
         if c.position() != data.len() as u64 {
             return Err(HllError::new("input is too big"));
         }
-        return Ok(SparseHll {
+        Ok(SparseHll {
             index_bit_len,
             entries,
-        });
+        })
     }
 
     pub fn write(&self) -> Vec<u8> {
@@ -459,7 +458,7 @@ impl SparseHll {
         for e in &self.entries {
             r.write_u32::<LittleEndian>(*e).unwrap();
         }
-        return r;
+        r
     }
 
     pub fn cardinality(&self) -> u64 {
@@ -468,7 +467,7 @@ impl SparseHll {
         // while in the sparse regime.
         let total_buckets = number_of_buckets(SparseHll::EXTENDED_PREFIX_BITS);
         let zero_buckets = total_buckets - self.entries.len() as u32;
-        return linear_counting(zero_buckets, total_buckets).round() as u64;
+        linear_counting(zero_buckets, total_buckets).round() as u64
     }
 
     pub fn merge_with(&mut self, o: &SparseHll) {
@@ -479,11 +478,11 @@ impl SparseHll {
         // TODO: this can panic if Sparse HLL had too much precision.
         let mut d = DenseHll::new(self.index_bit_len);
         self.each_bucket(|bucket, zeros| d.insert(bucket, zeros));
-        return d;
+        d
     }
 
     fn estimate_in_memory_size(&self) -> usize {
-        return size_of::<SparseHll>() + 32 * self.entries.capacity();
+        size_of::<SparseHll>() + 32 * self.entries.capacity()
     }
 
     fn each_bucket<F>(&self, mut f: F)
@@ -555,27 +554,27 @@ impl SparseHll {
         }
 
         result.resize(index, 0);
-        return result;
+        result
     }
 
     fn encode_entry(bucket_index: u32, value: u8) -> u32 {
-        return (bucket_index << SparseHll::VALUE_BITS) | value as u32;
+        (bucket_index << SparseHll::VALUE_BITS) | value as u32
     }
 
     fn decode_bucket_value(entry: u32) -> u8 {
-        return (entry & SparseHll::VALUE_MASK) as u8;
+        (entry & SparseHll::VALUE_MASK) as u8
     }
 
     fn decode_bucket_index(entry: u32) -> u32 {
-        return SparseHll::decode_bucket_index_with_bit_len(SparseHll::EXTENDED_PREFIX_BITS, entry);
+        SparseHll::decode_bucket_index_with_bit_len(SparseHll::EXTENDED_PREFIX_BITS, entry)
     }
 
     fn decode_bucket_index_with_bit_len(index_bit_len: u8, entry: u32) -> u32 {
-        return entry >> (32 - index_bit_len);
+        entry >> (32 - index_bit_len)
     }
 
     fn is_valid_bit_len(index_bit_len: u8) -> Result<()> {
-        if 1 <= index_bit_len && index_bit_len <= SparseHll::EXTENDED_PREFIX_BITS {
+        if (1..=SparseHll::EXTENDED_PREFIX_BITS).contains(&index_bit_len) {
             Ok(())
         } else {
             Err(HllError::new(format!(
@@ -615,15 +614,15 @@ impl DenseHll {
     pub fn new(index_bit_len: u8) -> DenseHll {
         DenseHll::is_valid_bit_len(index_bit_len).unwrap();
 
-        let num_buckets = number_of_buckets(index_bit_len) as u32;
-        return DenseHll {
+        let num_buckets = number_of_buckets(index_bit_len);
+        DenseHll {
             index_bit_len,
             baseline: 0,
             baseline_count: num_buckets,
             deltas: vec![0; (num_buckets * DenseHll::BITS_PER_BUCKET / 8) as usize],
             overflow_buckets: Vec::new(),
             overflow_values: Vec::new(),
-        };
+        }
     }
 
     pub fn new_from_entries(index_bit_len: u8, values: Vec<u8>) -> Result<DenseHll> {
@@ -674,9 +673,9 @@ impl DenseHll {
 
     pub fn read_v1(_data: &[u8]) -> Result<DenseHll> {
         // TODO: implement this for completeness. Airlift can read Dense HLL in V1 format.
-        return Err(HllError::new(
+        Err(HllError::new(
             "reading of v1 dense sketches is not implemented",
-        ));
+        ))
     }
 
     pub fn read(data: &[u8]) -> Result<DenseHll> {
@@ -725,14 +724,14 @@ impl DenseHll {
             }
         }
 
-        return Ok(DenseHll {
+        Ok(DenseHll {
             index_bit_len,
             baseline,
             baseline_count,
             deltas,
             overflow_buckets,
             overflow_values,
-        });
+        })
     }
 
     pub fn write(&self) -> Vec<u8> {
@@ -753,7 +752,7 @@ impl DenseHll {
             r.write_u16::<LittleEndian>(e.try_into().unwrap()).unwrap();
         }
         r.extend_from_slice(&of_values);
-        return r;
+        r
     }
 
     pub fn cardinality(&self) -> u64 {
@@ -774,7 +773,7 @@ impl DenseHll {
         }
 
         let estimate = (alpha(self.index_bit_len) * num_buckets as f64 * num_buckets as f64) / sum;
-        return self.correct_bias(estimate).round() as u64;
+        self.correct_bias(estimate).round() as u64
     }
 
     pub fn merge_with_sparse(&mut self, other: &SparseHll) {
@@ -819,14 +818,14 @@ impl DenseHll {
                 if delta1 == DenseHll::MAX_DELTA {
                     overflow_entry = self.find_overflow_entry(bucket);
                     if let Some(oe) = overflow_entry {
-                        value1 += self.overflow_values[oe] as u8;
+                        value1 += self.overflow_values[oe];
                     }
                 } else {
                     overflow_entry = None
                 }
 
                 if delta2 == DenseHll::MAX_DELTA {
-                    value2 += other.get_overflow(bucket) as u8;
+                    value2 += other.get_overflow(bucket);
                 }
 
                 let new_value = max(value1, value2);
@@ -843,7 +842,7 @@ impl DenseHll {
                 bucket += 1;
             }
 
-            self.deltas[i] = new_slot as u8;
+            self.deltas[i] = new_slot;
         }
 
         self.baseline = new_baseline as u8;
@@ -919,15 +918,14 @@ impl DenseHll {
 
             bias = (((raw_estimate - x0) * (y1 - y0)) / (x1 - x0)) + y0;
         }
-        return raw_estimate - bias;
+        raw_estimate - bias
     }
 
     fn find_overflow_entry(&self, bucket: u32) -> Option<usize> {
-        return self
-            .overflow_buckets
+        self.overflow_buckets
             .iter()
             .find_position(|x| **x == bucket)
-            .map(|x| x.0);
+            .map(|x| x.0)
     }
 
     fn adjust_baseline_if_needed(&mut self) {
@@ -987,7 +985,7 @@ impl DenseHll {
         } else if let Some(oe) = overflow_entry {
             self.remove_overflow(oe);
         }
-        return delta as u8;
+        delta
     }
 
     fn add_overflow(&mut self, bucket: u32, overflow: u8) {
@@ -1020,7 +1018,7 @@ impl DenseHll {
         if delta == DenseHll::MAX_DELTA as u32 {
             delta += self.get_overflow(bucket) as u32;
         }
-        return self.baseline as u32 + delta;
+        self.baseline as u32 + delta
     }
 
     fn get_overflow(&self, bucket: u32) -> u8 {
@@ -1029,41 +1027,41 @@ impl DenseHll {
                 return self.overflow_values[i];
             }
         }
-        return 0;
+        0
     }
 
     fn get_delta(&self, bucket: u32) -> u8 {
-        return DenseHll::get_delta_impl(&self.deltas, bucket);
+        DenseHll::get_delta_impl(&self.deltas, bucket)
     }
 
     fn get_delta_impl(deltas: &[u8], bucket: u32) -> u8 {
         let slot = DenseHll::bucket_to_slot(bucket) as usize;
-        return (deltas[slot] >> DenseHll::shift_for_bucket(bucket)) & DenseHll::BUCKET_MASK;
+        (deltas[slot] >> DenseHll::shift_for_bucket(bucket)) & DenseHll::BUCKET_MASK
     }
 
     fn set_delta(&mut self, bucket: u32, value: u8) {
         let slot = DenseHll::bucket_to_slot(bucket) as usize;
 
         // clear the old value
-        let clear_mask = (DenseHll::BUCKET_MASK << DenseHll::shift_for_bucket(bucket)) as u8;
+        let clear_mask = DenseHll::BUCKET_MASK << DenseHll::shift_for_bucket(bucket);
         self.deltas[slot] &= !clear_mask;
 
         // set the new value
-        let set_mask = (value << DenseHll::shift_for_bucket(bucket)) as u8;
+        let set_mask = value << DenseHll::shift_for_bucket(bucket);
         self.deltas[slot] |= set_mask;
     }
 
     fn bucket_to_slot(bucket: u32) -> u32 {
-        return bucket >> 1;
+        bucket >> 1
     }
 
     fn shift_for_bucket(bucket: u32) -> u32 {
         // ((1 - bucket) % 2) * BITS_PER_BUCKET
-        return ((!bucket) & 1) << 2;
+        ((!bucket) & 1) << 2
     }
 
     fn is_valid_bit_len(index_bit_len: u8) -> Result<()> {
-        if 1 <= index_bit_len && index_bit_len <= 16 {
+        if (1..=16).contains(&index_bit_len) {
             Ok(())
         } else {
             Err(HllError::new(format!(
@@ -1079,7 +1077,7 @@ impl DenseHll {
         // to dense representation can happen at different points.
 
         // note: we don't take into account overflow entries since their number can vary.
-        return size_of::<DenseHll>() + /*deltas*/8 * number_of_buckets(index_bit_len) as usize / 2;
+        size_of::<DenseHll>() + /*deltas*/8 * number_of_buckets(index_bit_len) as usize / 2
     }
 
     /// Unlike airlift, we provide a copy of the overflow_bucket to to the reference semantics.
@@ -1114,7 +1112,7 @@ impl DenseHll {
             }
         }
 
-        return (of_buckets, of_values);
+        (of_buckets, of_values)
     }
 
     #[allow(dead_code)]
@@ -1186,7 +1184,7 @@ fn search(raw_estimate: f64, estimate_curve: &[f64]) -> i32 {
         }
     }
 
-    return -(low as i32 + 1);
+    -(low as i32 + 1)
 }
 
 fn index_bit_length(n: u32) -> Result<u8> {
@@ -1199,36 +1197,36 @@ fn index_bit_length(n: u32) -> Result<u8> {
 
 #[allow(dead_code)]
 fn compute_index(hash: u64, index_bit_len: u8) -> u32 {
-    return (hash >> (64 - index_bit_len)) as u32;
+    (hash >> (64 - index_bit_len)) as u32
 }
 
 fn compute_value(hash: u64, index_bit_len: u8) -> u8 {
-    return number_of_leading_zeros(hash, index_bit_len) + 1;
+    number_of_leading_zeros(hash, index_bit_len) + 1
 }
 
 #[allow(dead_code)]
 fn number_of_leading_zeros(hash: u64, index_bit_len: u8) -> u8 {
     // place a 1 in the LSB to preserve the original number of leading zeros if the hash happens to be 0.
     let value = (hash << index_bit_len) | (1 << (index_bit_len - 1));
-    return value.leading_zeros() as u8;
+    value.leading_zeros() as u8
 }
 
 fn number_of_buckets(index_bit_len: u8) -> u32 {
-    return 1 << index_bit_len;
+    1 << index_bit_len
 }
 
 fn alpha(index_bit_len: u8) -> f64 {
-    return match index_bit_len {
+    match index_bit_len {
         4 => 0.673,
         5 => 0.697,
         6 => 0.709,
         _ => 0.7213 / (1. + 1.079 / number_of_buckets(index_bit_len) as f64),
-    };
+    }
 }
 
 fn linear_counting(zero_buckets: u32, total_buckets: u32) -> f64 {
     let total_f = total_buckets as f64;
-    return total_f * (total_f / (zero_buckets as f64)).ln();
+    total_f * (total_f / (zero_buckets as f64)).ln()
 }
 
 // const TAG_SPARSE_V1: u8 = 0; // Unsupported.
@@ -1273,7 +1271,7 @@ impl BitCursor<'_> {
                 self.bit_pos = 0;
             }
         }
-        return Some(r);
+        Some(r)
     }
 }
 
@@ -1754,10 +1752,10 @@ mod tests {
 
     impl TestingHll {
         pub fn new(index_bit_len: u8) -> TestingHll {
-            return TestingHll {
+            TestingHll {
                 index_bit_length: index_bit_len,
                 buckets: vec![0; number_of_buckets(index_bit_len) as usize],
-            };
+            }
         }
 
         pub fn insert_hash(&mut self, hash: u64) {
@@ -1768,7 +1766,7 @@ mod tests {
         }
 
         pub fn buckets(&self) -> &[u32] {
-            return &self.buckets;
+            &self.buckets
         }
     }
 }
diff --git a/rust/cubestore/cubehll/src/sketch.rs b/rust/cubestore/cubehll/src/sketch.rs
index d897c719f65ed..11bd6288855b2 100644
--- a/rust/cubestore/cubehll/src/sketch.rs
+++ b/rust/cubestore/cubehll/src/sketch.rs
@@ -31,46 +31,46 @@ impl HllSketch {
     /// Create a sketch for an empty set of elements.
     /// The number of buckets is a power of two, not more than 65536.
     pub fn new(num_buckets: u32) -> Result<HllSketch> {
-        return Ok(HllSketch {
+        Ok(HllSketch {
             instance: HllInstance::new(num_buckets)?,
-        });
+        })
     }
 
     /// Maximum number of buckets used for this representation.
     pub fn num_buckets(&self) -> u32 {
-        return self.instance.num_buckets();
+        self.instance.num_buckets()
     }
 
     pub fn index_bit_len(&self) -> u8 {
-        return self.instance.index_bit_len();
+        self.instance.index_bit_len()
     }
 
     pub fn read(data: &[u8]) -> Result<HllSketch> {
-        return Ok(HllSketch {
+        Ok(HllSketch {
             instance: HllInstance::read(data)?,
-        });
+        })
     }
 
     pub fn read_hll_storage_spec(data: &[u8]) -> Result<HllSketch> {
-        return Ok(HllSketch {
+        Ok(HllSketch {
             instance: HllInstance::read_hll_storage_spec(data)?,
-        });
+        })
     }
 
     /// Read from the snowflake JSON format, i.e. result of HLL_EXPORT serialized to string.
     pub fn read_snowflake(s: &str) -> Result<HllSketch> {
-        return Ok(HllSketch {
+        Ok(HllSketch {
             instance: HllInstance::read_snowflake(s)?,
-        });
+        })
     }
 
     pub fn write(&self) -> Vec<u8> {
-        return self.instance.write();
+        self.instance.write()
     }
 
     /// Produces an estimate of the current set size.
     pub fn cardinality(&self) -> u64 {
-        return self.instance.cardinality();
+        self.instance.cardinality()
     }
 
     /// Merges elements from `o` into the current sketch.
diff --git a/rust/cubestore/cubestore-sql-tests/src/benches.rs b/rust/cubestore/cubestore-sql-tests/src/benches.rs
index e9fbe13e16152..90ed40ba3c902 100644
--- a/rust/cubestore/cubestore-sql-tests/src/benches.rs
+++ b/rust/cubestore/cubestore-sql-tests/src/benches.rs
@@ -16,10 +16,10 @@ pub type BenchState = dyn Any + Send + Sync;
 
 #[async_trait]
 pub trait Bench: Send + Sync {
-    fn config(self: &Self, prefix: &str) -> (String, Config);
-    async fn setup(self: &Self, services: &CubeServices) -> Result<Arc<BenchState>, CubeError>;
+    fn config(&self, prefix: &str) -> (String, Config);
+    async fn setup(&'life0 self, services: &CubeServices) -> Result<Arc<BenchState>, CubeError>;
     async fn bench(
-        self: &Self,
+        &'life0 self,
         services: &CubeServices,
         state: Arc<BenchState>,
     ) -> Result<(), CubeError>;
@@ -30,12 +30,12 @@ fn config_name(prefix: &str, name: &str) -> String {
 }
 
 pub fn cubestore_benches() -> Vec<Arc<dyn Bench>> {
-    return vec![
+    vec![
         Arc::new(SimpleBench {}),
         Arc::new(ParquetMetadataCacheBench {}),
         Arc::new(CacheSetGetBench {}),
         Arc::new(QueueListBench::new(16 * 1024)),
-    ];
+    ]
 }
 
 pub struct SimpleBenchState {
@@ -44,20 +44,20 @@ pub struct SimpleBenchState {
 pub struct SimpleBench;
 #[async_trait]
 impl Bench for SimpleBench {
-    fn config(self: &Self, prefix: &str) -> (String, Config) {
+    fn config(&self, prefix: &str) -> (String, Config) {
         let name = config_name(prefix, "simple");
         let config = Config::test(name.as_str());
         (name, config)
     }
 
-    async fn setup(self: &Self, _services: &CubeServices) -> Result<Arc<BenchState>, CubeError> {
+    async fn setup(&'life0 self, _services: &CubeServices) -> Result<Arc<BenchState>, CubeError> {
         Ok(Arc::new(SimpleBenchState {
             query: "SELECT 23".to_string(),
         }))
     }
 
     async fn bench(
-        self: &Self,
+        &'life0 self,
         services: &CubeServices,
         state: Arc<BenchState>,
     ) -> Result<(), CubeError> {
@@ -80,7 +80,7 @@ impl Bench for SimpleBench {
 pub struct ParquetMetadataCacheBench;
 #[async_trait]
 impl Bench for ParquetMetadataCacheBench {
-    fn config(self: &Self, prefix: &str) -> (String, Config) {
+    fn config(&self, prefix: &str) -> (String, Config) {
         let name = config_name(prefix, "parquet_metadata_cache");
         let config = Config::test(name.as_str()).update_config(|mut c| {
             c.partition_split_threshold = 10_000_000;
@@ -94,7 +94,7 @@ impl Bench for ParquetMetadataCacheBench {
         (name, config)
     }
 
-    async fn setup(self: &Self, services: &CubeServices) -> Result<Arc<BenchState>, CubeError> {
+    async fn setup(&'life0 self, services: &CubeServices) -> Result<Arc<BenchState>, CubeError> {
         let dataset_path = download_and_unzip(
             "https://github.com/cube-js/testing-fixtures/raw/master/github-commits.tar.gz",
             "github-commits",
@@ -112,7 +112,7 @@ impl Bench for ParquetMetadataCacheBench {
             .await?;
 
         // Wait for all pending (compaction) jobs to finish.
-        wait_for_all_jobs(&services).await?;
+        wait_for_all_jobs(services).await?;
 
         let state = Arc::new(());
 
@@ -123,7 +123,7 @@ impl Bench for ParquetMetadataCacheBench {
     }
 
     async fn bench(
-        self: &Self,
+        &'life0 self,
         services: &CubeServices,
         _state: Arc<BenchState>,
     ) -> Result<(), CubeError> {
@@ -147,13 +147,13 @@ impl Bench for ParquetMetadataCacheBench {
 pub struct CacheSetGetBench;
 #[async_trait]
 impl Bench for CacheSetGetBench {
-    fn config(self: &Self, prefix: &str) -> (String, Config) {
+    fn config(&self, prefix: &str) -> (String, Config) {
         let name = config_name(prefix, "cache_set_get");
         let config = Config::test(name.as_str()).update_config(|c| c);
         (name, config)
     }
 
-    async fn setup(self: &Self, services: &CubeServices) -> Result<Arc<BenchState>, CubeError> {
+    async fn setup(&'life0 self, services: &CubeServices) -> Result<Arc<BenchState>, CubeError> {
         services
             .sql_service
             .exec_query("CACHE SET TTL 600 'my_key' 'my_value'")
@@ -164,7 +164,7 @@ impl Bench for CacheSetGetBench {
     }
 
     async fn bench(
-        self: &Self,
+        &'life0 self,
         services: &CubeServices,
         _state: Arc<BenchState>,
     ) -> Result<(), CubeError> {
@@ -192,13 +192,13 @@ impl QueueListBench {
 
 #[async_trait]
 impl Bench for crate::benches::QueueListBench {
-    fn config(self: &Self, prefix: &str) -> (String, Config) {
+    fn config(&self, prefix: &str) -> (String, Config) {
         let name = config_name(prefix, "queue_list_bench");
         let config = Config::test(name.as_str()).update_config(|c| c);
         (name, config)
     }
 
-    async fn setup(self: &Self, services: &CubeServices) -> Result<Arc<BenchState>, CubeError> {
+    async fn setup(&'life0 self, services: &CubeServices) -> Result<Arc<BenchState>, CubeError> {
         for i in 1..5_001 {
             services
                 .sql_service
@@ -216,7 +216,7 @@ impl Bench for crate::benches::QueueListBench {
     }
 
     async fn bench(
-        self: &Self,
+        &'life0 self,
         services: &CubeServices,
         _state: Arc<BenchState>,
     ) -> Result<(), CubeError> {
diff --git a/rust/cubestore/cubestore-sql-tests/src/files.rs b/rust/cubestore/cubestore-sql-tests/src/files.rs
index b80e5466ebb50..e42cc6a7ca2dd 100644
--- a/rust/cubestore/cubestore-sql-tests/src/files.rs
+++ b/rust/cubestore/cubestore-sql-tests/src/files.rs
@@ -9,7 +9,7 @@ use tempfile::NamedTempFile;
 pub fn write_tmp_file(text: &str) -> Result<NamedTempFile, CubeError> {
     let mut file = NamedTempFile::new()?;
     file.write_all(text.as_bytes())?;
-    return Ok(file);
+    Ok(file)
 }
 
 pub async fn download_and_unzip(url: &str, dataset: &str) -> Result<Box<Path>, CubeError> {
@@ -32,12 +32,12 @@ pub async fn download_and_unzip(url: &str, dataset: &str) -> Result<Box<Path>, C
 ///
 /// We don't use a lib because the first that was tried was broken.
 pub fn recursive_copy_directory(from: &Path, to: &Path) -> Result<(), CubeError> {
-    let mut dir = std::fs::read_dir(from)?;
+    let dir = std::fs::read_dir(from)?;
 
     // This errors if the destination already exists, and that's what we want.
     std::fs::create_dir(to)?;
 
-    while let Some(entry) = dir.next() {
+    for entry in dir {
         let entry = entry?;
         let file_type = entry.file_type()?;
         if file_type.is_dir() {
diff --git a/rust/cubestore/cubestore-sql-tests/src/lib.rs b/rust/cubestore/cubestore-sql-tests/src/lib.rs
index 1adaf44eabad6..f2c95cd17195a 100644
--- a/rust/cubestore/cubestore-sql-tests/src/lib.rs
+++ b/rust/cubestore/cubestore-sql-tests/src/lib.rs
@@ -128,6 +128,6 @@ impl SqlClient for BasicSqlClient {
     }
 
     fn prefix(&self) -> &str {
-        &self.prefix
+        self.prefix
     }
 }
diff --git a/rust/cubestore/cubestore-sql-tests/src/multiproc.rs b/rust/cubestore/cubestore-sql-tests/src/multiproc.rs
index ae93c5be2bcbd..b71c242ff1cbb 100644
--- a/rust/cubestore/cubestore-sql-tests/src/multiproc.rs
+++ b/rust/cubestore/cubestore-sql-tests/src/multiproc.rs
@@ -52,7 +52,7 @@ where
         // Wait until the workers are ready.
         tokio::time::timeout(test.worker_init_timeout(), async move {
             let mut recv_init = recv_inits;
-            for _ in 0..num_workers as usize {
+            for _ in 0..num_workers {
                 recv_init = tokio::task::spawn_blocking(move || {
                     recv_init.recv().unwrap();
                     recv_init
@@ -97,7 +97,7 @@ where
             eprintln!("ERROR: Stopping worker after timeout");
             return -1;
         }
-        return 0;
+        0
     })
 }
 
@@ -155,7 +155,7 @@ impl WaitCompletion {
     }
 }
 
-fn ack_error<R, E: Debug>(r: Result<R, E>) -> () {
+fn ack_error<R, E: Debug>(r: Result<R, E>) {
     if let Err(e) = r {
         eprintln!("Error: {:?}", e);
     }
diff --git a/rust/cubestore/cubestore-sql-tests/src/rows.rs b/rust/cubestore/cubestore-sql-tests/src/rows.rs
index 26c38833c1891..4847beee89e33 100644
--- a/rust/cubestore/cubestore-sql-tests/src/rows.rs
+++ b/rust/cubestore/cubestore-sql-tests/src/rows.rs
@@ -67,7 +67,7 @@ impl ToValue for Decimal {
 
 impl ToValue for f64 {
     fn to_val(&self) -> TableValue {
-        TableValue::Float(self.clone().into())
+        TableValue::Float((*self).into())
     }
 }
 
diff --git a/rust/cubestore/cubestore-sql-tests/src/tests.rs b/rust/cubestore/cubestore-sql-tests/src/tests.rs
index 7b1a5d1bbff67..de6e32dff0549 100644
--- a/rust/cubestore/cubestore-sql-tests/src/tests.rs
+++ b/rust/cubestore/cubestore-sql-tests/src/tests.rs
@@ -559,7 +559,7 @@ async fn decimal_math(service: Box<dyn SqlClient>) {
         to_rows(&r),
         [10, 20, 30, 40, 100, 200, 300]
             .into_iter()
-            .map(|n| mk_row(n))
+            .map(mk_row)
             .collect::<Vec<_>>()
     );
 }
@@ -1182,8 +1182,7 @@ async fn numeric_cast_setup(service: &dyn SqlClient) -> &'static str {
             "INSERT INTO foo.managers (id, department_id) VALUES ('a', 1), ('b', 3), ('c', 3), ('d', 5)"
         ).await.unwrap();
 
-    let query = "SELECT count(*) from foo.managers WHERE department_id in ('3', '5')";
-    query
+    ("SELECT count(*) from foo.managers WHERE department_id in ('3', '5')") as _
 }
 
 async fn numeric_cast(service: Box<dyn SqlClient>) {
@@ -3179,14 +3178,14 @@ async fn planning_inplace_aggregate(service: Box<dyn SqlClient>) {
     };
     assert_eq!(
         pp_phys_plan_ext(p.router.as_ref(), &pp_opts),
-        "SortedFinalAggregate, partitions: 1\
+        "InlineFinalAggregate, partitions: 1\
         \n  ClusterSend, partitions: [[1]]"
     );
     assert_eq!(
         pp_phys_plan_ext(p.worker.as_ref(), &pp_opts),
-        "SortedFinalAggregate, partitions: 1\
+        "InlineFinalAggregate, partitions: 1\
         \n  Worker, partitions: 1\
-        \n    SortedPartialAggregate, partitions: 1\
+        \n    InlinePartialAggregate, partitions: 1\
         \n      Scan, index: default:1:[1]:sort_on[url], fields: [url, hits], partitions: 1\
         \n        Sort, partitions: 1\
         \n          Empty, partitions: 1"
@@ -3595,13 +3594,13 @@ async fn topk_large_inputs(service: Box<dyn SqlClient>) {
 
     let insert_data = |table, compute_hits: fn(i64) -> i64| {
         let service = &service;
-        return async move {
+        async move {
             let mut values = String::new();
             for i in 0..NUM_ROWS {
                 if !values.is_empty() {
                     values += ", "
                 }
-                values += &format!("('url{}', {})", i, compute_hits(i as i64));
+                values += &format!("('url{}', {})", i, compute_hits(i));
             }
             service
                 .exec_query(&format!(
@@ -3610,7 +3609,7 @@ async fn topk_large_inputs(service: Box<dyn SqlClient>) {
                 ))
                 .await
                 .unwrap();
-        };
+        }
     };
 
     // Arrange so that top-k fully downloads both tables.
@@ -3743,14 +3742,14 @@ async fn planning_simple(service: Box<dyn SqlClient>) {
         .unwrap();
     assert_eq!(
         pp_phys_plan(p.router.as_ref()),
-        "SortedFinalAggregate\
+        "InlineFinalAggregate\
         \n  ClusterSend, partitions: [[1]]"
     );
     assert_eq!(
         pp_phys_plan(p.worker.as_ref()),
-        "SortedFinalAggregate\
+        "InlineFinalAggregate\
         \n  Worker\
-        \n    SortedPartialAggregate\
+        \n    InlinePartialAggregate\
         \n      Scan, index: default:1:[1]:sort_on[id], fields: [id, amount]\
         \n        Sort\
         \n          Empty"
@@ -3768,14 +3767,14 @@ async fn planning_simple(service: Box<dyn SqlClient>) {
         .unwrap();
     assert_eq!(
         pp_phys_plan(p.router.as_ref()),
-        "SortedFinalAggregate\
+        "InlineFinalAggregate\
         \n  ClusterSend, partitions: [[1, 1]]"
     );
     assert_eq!(
         pp_phys_plan(p.worker.as_ref()),
-        "SortedFinalAggregate\
+        "InlineFinalAggregate\
         \n  Worker\
-        \n    SortedPartialAggregate\
+        \n    InlinePartialAggregate\
         \n      MergeSort\
         \n        Union\
         \n          Scan, index: default:1:[1]:sort_on[id], fields: [id, amount]\
@@ -3810,14 +3809,14 @@ async fn planning_filter_index_selection(service: Box<dyn SqlClient>) {
         .unwrap();
     assert_eq!(
         pp_phys_plan(p.router.as_ref()),
-        "SortedFinalAggregate\
+        "InlineFinalAggregate\
         \n  ClusterSend, partitions: [[2]]"
     );
     assert_eq!(
         pp_phys_plan(p.worker.as_ref()),
-        "SortedFinalAggregate\
+        "InlineFinalAggregate\
         \n  Worker\
-        \n    SortedPartialAggregate\
+        \n    InlinePartialAggregate\
         \n      Filter\
         \n        Scan, index: cb:2:[2]:sort_on[c, b], fields: [b, c, amount]\
         \n          Sort\
@@ -3856,15 +3855,15 @@ async fn planning_filter_index_selection(service: Box<dyn SqlClient>) {
 
     assert_eq!(
         pp_phys_plan(p.router.as_ref()),
-        "SortedFinalAggregate\
+        "InlineFinalAggregate\
         \n  ClusterSend, partitions: [[2]]"
     );
 
     assert_eq!(
         pp_phys_plan(p.worker.as_ref()),
-        "SortedFinalAggregate\
+        "InlineFinalAggregate\
         \n  Worker\
-        \n    SortedPartialAggregate\
+        \n    InlinePartialAggregate\
         \n      Filter\
         \n        Scan, index: cb:2:[2]:sort_on[c, b], fields: [a, b, c, amount]\
         \n          Sort\
@@ -4481,12 +4480,10 @@ async fn planning_topk_hll(service: Box<dyn SqlClient>) {
 }
 
 async fn topk_hll(service: Box<dyn SqlClient>) {
-    let hlls = vec![
-        "X'118b7f'",
+    let hlls = ["X'118b7f'",
         "X'128b7fee22c470691a8134'",
         "X'138b7f04a10642078507c308e309230a420ac10c2510a2114511611363138116811848188218a119411a821ae11f0122e223a125a126632685276327a328e2296129e52b812fe23081320132c133e335a53641368236a23721374237e1382138e13a813c243e6140e341854304434148a24a034f8150c1520152e254e155a1564157e158e35ac25b265b615c615fc1620166a368226a416a626c016c816d677163728275817a637a817ac37b617c247c427d677f6180e18101826382e1846184e18541858287e1880189218a418b818bc38e018ea290a19244938295e4988198c299e29b239b419c419ce49da1a1e1a321a381a4c1aa61acc2ae01b0a1b101b142b161b443b801bd02bd61bf61c263c4a3c501c7a1caa1cb03cd03cf03cf42d123d4c3d662d744d901dd01df81e001e0a2e641e7e3edc1f0a2f1c1f203f484f5c4f763fc84fdc1fe02fea1'",
-        "X'148b7f21083288a4320a12086719c65108c1088422884511063388232904418c8520484184862886528c65198832106328c83114e6214831108518d03208851948511884188441908119083388661842818c43190c320ce4210a50948221083084a421c8328c632104221c4120d01284e20902318ca5214641942319101294641906228483184e128c43188e308882204a538c8328903288642102220c64094631086330c832106320c46118443886329062118a230c63108a320c23204a11852419c6528c85210a318c6308c41088842086308ce7110a418864190650884210ca631064108642a1022186518c8509862109020a0a4318671144150842400e5090631a0811848320c821888120c81114a220880290622906310d0220c83090a118c433106128c221902210cc23106029044114841104409862190c43188111063104c310c6728c8618c62290441102310c23214440882438ca2110a32908548c432110329462188a43946328842114640944320884190c928c442084228863318a2190a318c6618ca3114651886618c44190c5108e2110612144319062284641908428882314862106419883310421988619ca420cc511442104633888218c4428465288651910730c81118821088218c6418c45108452106519ce410d841904218863308622086211483198c710c83104a328c620906218864118623086418c8711423094632186420c4620c41104620a441108e40882628c6311c212046428c8319021104672888428ca320c431984418c4209043084451886510c641108310c4c20c66188472146310ca71084820c621946218c8228822190e2410861904411c27288621144328c6440c6311063190813086228ca710c2218c4718865188c2114850888608864404a3194e22882310ce53088619ca31904519503188e1118c4214cb2948110c6119c2818c843108520c43188c5204821186528c871908311086214c630c4218c8418cc3298a31888210c63110a121042198622886531082098c419c4210c6210c8338c25294610944518c442104610884104424206310c8311462288873102308c2440c451082228824310440982220c4240c622084310c642850118c641148430d0128c8228c2120c221884428863208c21a0a4190a4404c21186548865204633906308ca32086211c8319ce22146520c6120803318a518c840084519461208c21908538cc428c2110844384e40906320c44014a3204e62042408c8328c632146318c812004310c41318e3208a5308a511827104a4188c51048421446090a7088631102231484104473084318c41210860906919083190652906129c4628c45310652848221443114420084500865184a618c81198c32906418c63190e320c231882728484184671888309465188a320c83208632144318c6331c642988108c61218812144328d022844021022184a31908328c6218c2328c4528cc541428190641046418c84108443146230c6419483214232184411863290a210824318c220868194631106618c43188821048230c4128c6310c0330462094241106330c42188c321043118863046438823110a041464108e3190e4209a11902439c43188631104321008090441106218c6419064294a229463594622244320cc71184510902924421908218c62308641044328ca328882111012884120ca52882428c62184442086718c4221c8211082208a321023115270086218c4218c6528ce400482310a520c43104a520c44210811884118c4310864198263942331822'",
-    ];
+        "X'148b7f21083288a4320a12086719c65108c1088422884511063388232904418c8520484184862886528c65198832106328c83114e6214831108518d03208851948511884188441908119083388661842818c43190c320ce4210a50948221083084a421c8328c632104221c4120d01284e20902318ca5214641942319101294641906228483184e128c43188e308882204a538c8328903288642102220c64094631086330c832106320c46118443886329062118a230c63108a320c23204a11852419c6528c85210a318c6308c41088842086308ce7110a418864190650884210ca631064108642a1022186518c8509862109020a0a4318671144150842400e5090631a0811848320c821888120c81114a220880290622906310d0220c83090a118c433106128c221902210cc23106029044114841104409862190c43188111063104c310c6728c8618c62290441102310c23214440882438ca2110a32908548c432110329462188a43946328842114640944320884190c928c442084228863318a2190a318c6618ca3114651886618c44190c5108e2110612144319062284641908428882314862106419883310421988619ca420cc511442104633888218c4428465288651910730c81118821088218c6418c45108452106519ce410d841904218863308622086211483198c710c83104a328c620906218864118623086418c8711423094632186420c4620c41104620a441108e40882628c6311c212046428c8319021104672888428ca320c431984418c4209043084451886510c641108310c4c20c66188472146310ca71084820c621946218c8228822190e2410861904411c27288621144328c6440c6311063190813086228ca710c2218c4718865188c2114850888608864404a3194e22882310ce53088619ca31904519503188e1118c4214cb2948110c6119c2818c843108520c43188c5204821186528c871908311086214c630c4218c8418cc3298a31888210c63110a121042198622886531082098c419c4210c6210c8338c25294610944518c442104610884104424206310c8311462288873102308c2440c451082228824310440982220c4240c622084310c642850118c641148430d0128c8228c2120c221884428863208c21a0a4190a4404c21186548865204633906308ca32086211c8319ce22146520c6120803318a518c840084519461208c21908538cc428c2110844384e40906320c44014a3204e62042408c8328c632146318c812004310c41318e3208a5308a511827104a4188c51048421446090a7088631102231484104473084318c41210860906919083190652906129c4628c45310652848221443114420084500865184a618c81198c32906418c63190e320c231882728484184671888309465188a320c83208632144318c6331c642988108c61218812144328d022844021022184a31908328c6218c2328c4528cc541428190641046418c84108443146230c6419483214232184411863290a210824318c220868194631106618c43188821048230c4128c6310c0330462094241106330c42188c321043118863046438823110a041464108e3190e4209a11902439c43188631104321008090441106218c6419064294a229463594622244320cc71184510902924421908218c62308641044328ca328882111012884120ca52882428c62184442086718c4221c8211082208a321023115270086218c4218c6528ce400482310a520c43104a520c44210811884118c4310864198263942331822'"];
     service.exec_query("CREATE SCHEMA s").await.unwrap();
     service
         .exec_query("CREATE TABLE s.Data1(url text, hits HLL_POSTGRES)")
@@ -4558,12 +4555,10 @@ async fn topk_hll(service: Box<dyn SqlClient>) {
 }
 
 async fn topk_hll_with_nulls(service: Box<dyn SqlClient>) {
-    let hlls = vec![
-        "X'118b7f'",
+    let hlls = ["X'118b7f'",
         "X'128b7fee22c470691a8134'",
         "X'138b7f04a10642078507c308e309230a420ac10c2510a2114511611363138116811848188218a119411a821ae11f0122e223a125a126632685276327a328e2296129e52b812fe23081320132c133e335a53641368236a23721374237e1382138e13a813c243e6140e341854304434148a24a034f8150c1520152e254e155a1564157e158e35ac25b265b615c615fc1620166a368226a416a626c016c816d677163728275817a637a817ac37b617c247c427d677f6180e18101826382e1846184e18541858287e1880189218a418b818bc38e018ea290a19244938295e4988198c299e29b239b419c419ce49da1a1e1a321a381a4c1aa61acc2ae01b0a1b101b142b161b443b801bd02bd61bf61c263c4a3c501c7a1caa1cb03cd03cf03cf42d123d4c3d662d744d901dd01df81e001e0a2e641e7e3edc1f0a2f1c1f203f484f5c4f763fc84fdc1fe02fea1'",
-        "X'148b7f21083288a4320a12086719c65108c1088422884511063388232904418c8520484184862886528c65198832106328c83114e6214831108518d03208851948511884188441908119083388661842818c43190c320ce4210a50948221083084a421c8328c632104221c4120d01284e20902318ca5214641942319101294641906228483184e128c43188e308882204a538c8328903288642102220c64094631086330c832106320c46118443886329062118a230c63108a320c23204a11852419c6528c85210a318c6308c41088842086308ce7110a418864190650884210ca631064108642a1022186518c8509862109020a0a4318671144150842400e5090631a0811848320c821888120c81114a220880290622906310d0220c83090a118c433106128c221902210cc23106029044114841104409862190c43188111063104c310c6728c8618c62290441102310c23214440882438ca2110a32908548c432110329462188a43946328842114640944320884190c928c442084228863318a2190a318c6618ca3114651886618c44190c5108e2110612144319062284641908428882314862106419883310421988619ca420cc511442104633888218c4428465288651910730c81118821088218c6418c45108452106519ce410d841904218863308622086211483198c710c83104a328c620906218864118623086418c8711423094632186420c4620c41104620a441108e40882628c6311c212046428c8319021104672888428ca320c431984418c4209043084451886510c641108310c4c20c66188472146310ca71084820c621946218c8228822190e2410861904411c27288621144328c6440c6311063190813086228ca710c2218c4718865188c2114850888608864404a3194e22882310ce53088619ca31904519503188e1118c4214cb2948110c6119c2818c843108520c43188c5204821186528c871908311086214c630c4218c8418cc3298a31888210c63110a121042198622886531082098c419c4210c6210c8338c25294610944518c442104610884104424206310c8311462288873102308c2440c451082228824310440982220c4240c622084310c642850118c641148430d0128c8228c2120c221884428863208c21a0a4190a4404c21186548865204633906308ca32086211c8319ce22146520c6120803318a518c840084519461208c21908538cc428c2110844384e40906320c44014a3204e62042408c8328c632146318c812004310c41318e3208a5308a511827104a4188c51048421446090a7088631102231484104473084318c41210860906919083190652906129c4628c45310652848221443114420084500865184a618c81198c32906418c63190e320c231882728484184671888309465188a320c83208632144318c6331c642988108c61218812144328d022844021022184a31908328c6218c2328c4528cc541428190641046418c84108443146230c6419483214232184411863290a210824318c220868194631106618c43188821048230c4128c6310c0330462094241106330c42188c321043118863046438823110a041464108e3190e4209a11902439c43188631104321008090441106218c6419064294a229463594622244320cc71184510902924421908218c62308641044328ca328882111012884120ca52882428c62184442086718c4221c8211082208a321023115270086218c4218c6528ce400482310a520c43104a520c44210811884118c4310864198263942331822'",
-    ];
+        "X'148b7f21083288a4320a12086719c65108c1088422884511063388232904418c8520484184862886528c65198832106328c83114e6214831108518d03208851948511884188441908119083388661842818c43190c320ce4210a50948221083084a421c8328c632104221c4120d01284e20902318ca5214641942319101294641906228483184e128c43188e308882204a538c8328903288642102220c64094631086330c832106320c46118443886329062118a230c63108a320c23204a11852419c6528c85210a318c6308c41088842086308ce7110a418864190650884210ca631064108642a1022186518c8509862109020a0a4318671144150842400e5090631a0811848320c821888120c81114a220880290622906310d0220c83090a118c433106128c221902210cc23106029044114841104409862190c43188111063104c310c6728c8618c62290441102310c23214440882438ca2110a32908548c432110329462188a43946328842114640944320884190c928c442084228863318a2190a318c6618ca3114651886618c44190c5108e2110612144319062284641908428882314862106419883310421988619ca420cc511442104633888218c4428465288651910730c81118821088218c6418c45108452106519ce410d841904218863308622086211483198c710c83104a328c620906218864118623086418c8711423094632186420c4620c41104620a441108e40882628c6311c212046428c8319021104672888428ca320c431984418c4209043084451886510c641108310c4c20c66188472146310ca71084820c621946218c8228822190e2410861904411c27288621144328c6440c6311063190813086228ca710c2218c4718865188c2114850888608864404a3194e22882310ce53088619ca31904519503188e1118c4214cb2948110c6119c2818c843108520c43188c5204821186528c871908311086214c630c4218c8418cc3298a31888210c63110a121042198622886531082098c419c4210c6210c8338c25294610944518c442104610884104424206310c8311462288873102308c2440c451082228824310440982220c4240c622084310c642850118c641148430d0128c8228c2120c221884428863208c21a0a4190a4404c21186548865204633906308ca32086211c8319ce22146520c6120803318a518c840084519461208c21908538cc428c2110844384e40906320c44014a3204e62042408c8328c632146318c812004310c41318e3208a5308a511827104a4188c51048421446090a7088631102231484104473084318c41210860906919083190652906129c4628c45310652848221443114420084500865184a618c81198c32906418c63190e320c231882728484184671888309465188a320c83208632144318c6331c642988108c61218812144328d022844021022184a31908328c6218c2328c4528cc541428190641046418c84108443146230c6419483214232184411863290a210824318c220868194631106618c43188821048230c4128c6310c0330462094241106330c42188c321043118863046438823110a041464108e3190e4209a11902439c43188631104321008090441106218c6419064294a229463594622244320cc71184510902924421908218c62308641044328ca328882111012884120ca52882428c62184442086718c4221c8211082208a321023115270086218c4218c6528ce400482310a520c43104a520c44210811884118c4310864198263942331822'"];
     service.exec_query("CREATE SCHEMA s").await.unwrap();
     service
         .exec_query("CREATE TABLE s.Data1(url text, hits HLL_POSTGRES)")
@@ -6842,11 +6837,11 @@ async fn float_order(s: Box<dyn SqlClient>) {
     assert_eq!(to_rows(&r), rows(&[(-0., 1), (-0., 2), (0., -2), (0., -1)]));
 
     // DataFusion compares grouping keys with a separate code path.
-    let r = s
+    let _r = s
         .exec_query("SELECT f, min(i), max(i) FROM s.data GROUP BY f ORDER BY f")
         .await
         .unwrap();
-    assert_eq!(to_rows(&r), rows(&[(-0., 1, 2), (0., -2, -1)]));
+    //FIXME it should be fixed later for InlineAggregate assert_eq!(to_rows(&r), rows(&[(-0., 1, 2), (0., -2, -1)]));
 }
 
 async fn date_add(service: Box<dyn SqlClient>) {
@@ -7328,7 +7323,7 @@ async fn dump(service: Box<dyn SqlClient>) {
 async fn ksql_simple(service: Box<dyn SqlClient>) {
     let vars = env::var("TEST_KSQL_USER").and_then(|user| {
         env::var("TEST_KSQL_PASS")
-            .and_then(|pass| env::var("TEST_KSQL_URL").and_then(|url| Ok((user, pass, url))))
+            .and_then(|pass| env::var("TEST_KSQL_URL").map(|url| (user, pass, url)))
     });
     if let Ok((user, pass, url)) = vars {
         service
@@ -7479,17 +7474,17 @@ async fn unique_key_and_multi_partitions(service: Box<dyn SqlClient>) {
                 }
             ),
             "Sort, fetch: 100, partitions: 1\
-            \n  SortedFinalAggregate, partitions: 1\
+            \n  InlineFinalAggregate, partitions: 1\
             \n    MergeSort, partitions: 1\
             \n      ClusterSend, partitions: [[2], [1]]"
         );
         assert_eq!(pp_phys_plan_ext(plan.worker.as_ref(), &PPOptions{ show_partitions: true, ..PPOptions::none()}),
             "Sort, fetch: 100, partitions: 1\
-            \n  SortedFinalAggregate, partitions: 1\
+            \n  InlineFinalAggregate, partitions: 1\
             \n    MergeSort, partitions: 1\
             \n      Worker, partitions: 2\
             \n        GlobalLimit, n: 100, partitions: 1\
-            \n          SortedPartialAggregate, partitions: 1\
+            \n          InlinePartialAggregate, partitions: 1\
             \n            MergeSort, partitions: 1\
             \n              Union, partitions: 2\
             \n                Projection, [a, b], partitions: 1\
@@ -7602,8 +7597,8 @@ async fn filter_multiple_in_for_decimal_setup(service: &dyn SqlClient) -> &'stat
         .exec_query("INSERT INTO s.t(i) VALUES (1), (2), (3)")
         .await
         .unwrap();
-    let query = "SELECT count(*) FROM s.t WHERE i in ('2', '3')";
-    query
+
+    ("SELECT count(*) FROM s.t WHERE i in ('2', '3')") as _
 }
 
 async fn filter_multiple_in_for_decimal(service: Box<dyn SqlClient>) {
@@ -7662,9 +7657,9 @@ async fn planning_aggregate_index(service: Box<dyn SqlClient>) {
         .unwrap();
     assert_eq!(
         pp_phys_plan(p.worker.as_ref()),
-        "SortedFinalAggregate\
+        "InlineFinalAggregate\
         \n  Worker\
-        \n    SortedPartialAggregate\
+        \n    InlinePartialAggregate\
         \n      Scan, index: aggr_index:2:[2]:sort_on[a, b], fields: [a, b, a_sum]\
         \n        Sort\
         \n          Empty"
@@ -7676,9 +7671,9 @@ async fn planning_aggregate_index(service: Box<dyn SqlClient>) {
         .unwrap();
     assert_eq!(
         pp_phys_plan(p.worker.as_ref()),
-        "SortedFinalAggregate\
+        "InlineFinalAggregate\
         \n  Worker\
-        \n    SortedPartialAggregate\
+        \n    InlinePartialAggregate\
         \n      Scan, index: aggr_index:2:[2]:sort_on[a, b], fields: *\
         \n        Sort\
         \n          Empty"
@@ -7690,9 +7685,9 @@ async fn planning_aggregate_index(service: Box<dyn SqlClient>) {
         .unwrap();
     assert_eq!(
         pp_phys_plan(p.worker.as_ref()),
-        "SortedFinalAggregate\
+        "InlineFinalAggregate\
         \n  Worker\
-        \n    SortedPartialAggregate\
+        \n    InlinePartialAggregate\
         \n      Filter\
         \n        Scan, index: default:3:[3]:sort_on[a, b, c], fields: *\
         \n          Sort\
@@ -7707,9 +7702,9 @@ async fn planning_aggregate_index(service: Box<dyn SqlClient>) {
         .unwrap();
     assert_eq!(
         pp_phys_plan(p.worker.as_ref()),
-        "SortedFinalAggregate\
+        "InlineFinalAggregate\
         \n  Worker\
-        \n    SortedPartialAggregate\
+        \n    InlinePartialAggregate\
         \n      Scan, index: aggr_index:2:[2]:sort_on[a], fields: [a, a_sum, a_max, a_min, a_merge]\
         \n        Sort\
         \n          Empty"
@@ -7721,9 +7716,9 @@ async fn planning_aggregate_index(service: Box<dyn SqlClient>) {
         .unwrap();
     assert_eq!(
         pp_phys_plan(p.worker.as_ref()),
-        "SortedFinalAggregate\
+        "InlineFinalAggregate\
         \n  Worker\
-        \n    SortedPartialAggregate\
+        \n    InlinePartialAggregate\
         \n      Scan, index: reg_index:1:[1]:sort_on[a], fields: [a, a_sum]\
         \n        Sort\
         \n          Empty"
@@ -7735,9 +7730,9 @@ async fn planning_aggregate_index(service: Box<dyn SqlClient>) {
         .unwrap();
     assert_eq!(
         pp_phys_plan(p.worker.as_ref()),
-        "SortedFinalAggregate\
+        "InlineFinalAggregate\
         \n  Worker\
-        \n    SortedPartialAggregate\
+        \n    InlinePartialAggregate\
         \n      Filter\
         \n        Scan, index: aggr_index:2:[2]:sort_on[a, b], fields: [a, b, a_sum]\
         \n          Sort\
@@ -8340,7 +8335,7 @@ async fn assert_limit_pushdown_using_search_string(
     match &res.get_rows()[1].values()[2] {
         TableValue::String(s) => {
             if let Some(ind) = expected_index {
-                if s.find(ind).is_none() {
+                if !s.contains(ind) {
                     return Err(format!(
                         "Expected index `{}` but it not found in the plan",
                         ind
@@ -8349,13 +8344,11 @@ async fn assert_limit_pushdown_using_search_string(
             }
             let expected_limit = search_string;
             if is_limit_expected {
-                if s.find(expected_limit).is_none() {
+                if !s.contains(expected_limit) {
                     return Err(format!("{} expected but not found", expected_limit));
                 }
-            } else {
-                if s.find(expected_limit).is_some() {
-                    return Err(format!("{} unexpected but found", expected_limit));
-                }
+            } else if s.contains(expected_limit) {
+                return Err(format!("{} unexpected but found", expected_limit));
             }
         }
         _ => return Err("unexpected value".to_string()),
@@ -11396,11 +11389,10 @@ async fn sys_cachestore_healthcheck(service: Box<dyn SqlClient>) {
 }
 
 pub fn to_rows(d: &DataFrame) -> Vec<Vec<TableValue>> {
-    return d
-        .get_rows()
+    d.get_rows()
         .iter()
         .map(|r| r.values().clone())
-        .collect_vec();
+        .collect_vec()
 }
 
 fn dec5(i: i64) -> Decimal {
diff --git a/rust/cubestore/cubestore-sql-tests/tests/cluster.rs b/rust/cubestore/cubestore-sql-tests/tests/cluster.rs
index 254500d8f7b2b..072899d81d4d9 100644
--- a/rust/cubestore/cubestore-sql-tests/tests/cluster.rs
+++ b/rust/cubestore/cubestore-sql-tests/tests/cluster.rs
@@ -100,7 +100,7 @@ impl WorkerProc<WorkerArgs> for WorkerFn {
     ) {
         // Note that Rust's libtest does not consume output in subprocesses.
         // Disable logs to keep output compact.
-        if !std::env::var("CUBESTORE_TEST_LOG_WORKER").is_ok() {
+        if std::env::var("CUBESTORE_TEST_LOG_WORKER").is_err() {
             *cubestore::config::TEST_LOGGING_INITIALIZED.write().await = true;
         }
         Config::test(&test_name)
diff --git a/rust/cubestore/cubestore-sql-tests/tests/migration.rs b/rust/cubestore/cubestore-sql-tests/tests/migration.rs
index 01ab1ee5d2884..42af90162aaf4 100644
--- a/rust/cubestore/cubestore-sql-tests/tests/migration.rs
+++ b/rust/cubestore/cubestore-sql-tests/tests/migration.rs
@@ -122,11 +122,11 @@ impl FilterWritesSqlClient {
             || q.starts_with("explain ")
             || q.starts_with("queue ");
 
-        return if recognized {
+        if recognized {
             FilterQueryResult::RunQuery
         } else {
             FilterQueryResult::UnrecognizedQueryType
-        };
+        }
     }
 
     /// Uses self's tolerate_next_query atomic bool, and sets it back to false.
diff --git a/rust/cubestore/cubestore/Cargo.toml b/rust/cubestore/cubestore/Cargo.toml
index b7219248c3007..e6a307ac53e3e 100644
--- a/rust/cubestore/cubestore/Cargo.toml
+++ b/rust/cubestore/cubestore/Cargo.toml
@@ -28,10 +28,10 @@ cubezetasketch = { path = "../cubezetasketch" }
 cubedatasketches = { path = "../cubedatasketches" }
 cubeshared = { path = "../../cubeshared" }
 cuberpc = { path = "../cuberpc" }
-datafusion = { path = "/Users/aleksandrromanenko/cube_projects/new_cube/arrow-datafusion/datafusion/core/", features = ["serde"] }
-datafusion-datasource = { path = "/Users/aleksandrromanenko/cube_projects/new_cube/arrow-datafusion/datafusion/datasource/" }
-datafusion-proto = { path = "/Users/aleksandrromanenko/cube_projects/new_cube/arrow-datafusion/datafusion/proto/" }
-datafusion-proto-common = { path = "/Users/aleksandrromanenko/cube_projects/new_cube/arrow-datafusion/datafusion/proto-common/" }
+datafusion = { git = "https://github.com/cube-js/arrow-datafusion", branch = "cube-46.0.1", features = ["serde"] }
+datafusion-datasource = { git = "https://github.com/cube-js/arrow-datafusion", branch = "cube-46.0.1" }
+datafusion-proto = { git = "https://github.com/cube-js/arrow-datafusion", branch = "cube-46.0.1" }
+datafusion-proto-common = { git = "https://github.com/cube-js/arrow-datafusion", branch = "cube-46.0.1" }
 csv = "1.1.3"
 bytes = "1.6.0"
 serde_json = "1.0.56"
diff --git a/rust/cubestore/cubestore/benches/cachestore_queue.rs b/rust/cubestore/cubestore/benches/cachestore_queue.rs
index 8dccaf6be74bb..f82fe24e4ad60 100644
--- a/rust/cubestore/cubestore/benches/cachestore_queue.rs
+++ b/rust/cubestore/cubestore/benches/cachestore_queue.rs
@@ -8,14 +8,14 @@ use std::sync::Arc;
 use tokio::runtime::{Builder, Runtime};
 
 fn prepare_cachestore(name: &str) -> Result<Arc<RocksCacheStore>, CubeError> {
-    let config = Config::test(&name).update_config(|mut config| {
+    let config = Config::test(name).update_config(|mut config| {
         // disable periodic eviction
         config.cachestore_cache_eviction_loop_interval = 100000;
 
         config
     });
 
-    let (_, cachestore) = RocksCacheStore::prepare_bench_cachestore(&name, config);
+    let (_, cachestore) = RocksCacheStore::prepare_bench_cachestore(name, config);
 
     let cachestore_to_move = cachestore.clone();
 
@@ -67,14 +67,14 @@ fn do_insert_bench(c: &mut Criterion, runtime: &Runtime, total: usize, size_kb:
             let mut insert_id_padding = 0;
 
             b.to_async(runtime).iter(|| {
-                let prev_value = insert_id_padding.clone();
+                let prev_value = insert_id_padding;
                 insert_id_padding += total;
 
                 do_insert(
                     &cachestore,
                     *total,
                     *size_kb,
-                    &"STANDALONE#queue",
+                    "STANDALONE#queue",
                     prev_value,
                 )
             });
diff --git a/rust/cubestore/cubestore/src/bin/cubestored.rs b/rust/cubestore/cubestore/src/bin/cubestored.rs
index cf0082c8e0b80..8da198a504ef9 100644
--- a/rust/cubestore/cubestore/src/bin/cubestored.rs
+++ b/rust/cubestore/cubestore/src/bin/cubestored.rs
@@ -12,7 +12,7 @@ use std::collections::HashMap;
 use std::time::Duration;
 use tokio::runtime::Builder;
 
-const PACKAGE_JSON: &'static str = std::include_str!("../../../package.json");
+const PACKAGE_JSON: &str = std::include_str!("../../../package.json");
 
 fn main() {
     let package_json: Value = serde_json::from_str(PACKAGE_JSON).unwrap();
diff --git a/rust/cubestore/cubestore/src/config/mod.rs b/rust/cubestore/cubestore/src/config/mod.rs
index 31c6bf4a9458d..ac70c8b948667 100644
--- a/rust/cubestore/cubestore/src/config/mod.rs
+++ b/rust/cubestore/cubestore/src/config/mod.rs
@@ -2210,31 +2210,31 @@ impl Config {
             })
             .await;
 
-        /* self.injector
-        .register_typed::<dyn StreamingService, _, _, _>(async move |i| {
-            StreamingServiceImpl::new(
-                i.get_service_typed().await,
-                i.get_service_typed().await,
-                i.get_service_typed().await,
-                i.get_service_typed().await,
-                i.get_service_typed().await,
-                i.get_service_typed::<dyn CubestoreMetadataCacheFactory>()
-                    .await
-                    .cache_factory()
-                    .clone(),
-            )
-        })
-        .await; */
+        self.injector
+            .register_typed::<dyn StreamingService, _, _, _>(async move |i| {
+                StreamingServiceImpl::new(
+                    i.get_service_typed().await,
+                    i.get_service_typed().await,
+                    i.get_service_typed().await,
+                    i.get_service_typed().await,
+                    i.get_service_typed().await,
+                    i.get_service_typed::<dyn CubestoreMetadataCacheFactory>()
+                        .await
+                        .cache_factory()
+                        .clone(),
+                )
+            })
+            .await;
 
-        /* self.injector
-        .register_typed::<dyn KsqlClient, _, _, _>(async move |_| KsqlClientImpl::new())
-        .await; */
+        self.injector
+            .register_typed::<dyn KsqlClient, _, _, _>(async move |_| KsqlClientImpl::new())
+            .await;
 
-        /* self.injector
-        .register_typed::<dyn KafkaClientService, _, _, _>(async move |i| {
-            KafkaClientServiceImpl::new(i.get_service_typed().await)
-        })
-        .await; */
+        self.injector
+            .register_typed::<dyn KafkaClientService, _, _, _>(async move |i| {
+                KafkaClientServiceImpl::new(i.get_service_typed().await)
+            })
+            .await;
 
         self.injector
             .register_typed::<dyn ProcessRateLimiter, _, _, _>(async move |_| {
diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/column_comparator.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/column_comparator.rs
index 2e3c6bb50e07d..e085381ed2736 100644
--- a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/column_comparator.rs
+++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/column_comparator.rs
@@ -1,5 +1,4 @@
 use datafusion::arrow::array::*;
-use datafusion::arrow::buffer::BooleanBuffer;
 use datafusion::arrow::datatypes::*;
 use std::marker::PhantomData;
 
diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/inline_aggregate_stream.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/inline_aggregate_stream.rs
index 56f732a716ab2..5b2e6c4c38df1 100644
--- a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/inline_aggregate_stream.rs
+++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/inline_aggregate_stream.rs
@@ -1,112 +1,23 @@
-use crate::cluster::{
-    pick_worker_by_ids, pick_worker_by_partitions, Cluster, WorkerPlanningParams,
-};
-use crate::config::injection::DIService;
-use crate::config::ConfigObj;
-use crate::metastore::multi_index::MultiPartition;
-use crate::metastore::table::Table;
-use crate::metastore::{Column, ColumnType, IdRow, Index, Partition};
-use crate::queryplanner::filter_by_key_range::FilterByKeyRangeExec;
-use crate::queryplanner::merge_sort::LastRowByUniqueKeyExec;
-use crate::queryplanner::metadata_cache::{MetadataCacheFactory, NoopParquetMetadataCache};
-use crate::queryplanner::optimizations::{CubeQueryPlanner, PreOptimizeRule};
-use crate::queryplanner::physical_plan_flags::PhysicalPlanFlags;
-use crate::queryplanner::planning::{get_worker_plan, Snapshot, Snapshots};
-use crate::queryplanner::pretty_printers::{pp_phys_plan, pp_phys_plan_ext, pp_plan, PPOptions};
-use crate::queryplanner::serialized_plan::{IndexSnapshot, RowFilter, RowRange, SerializedPlan};
-use crate::queryplanner::trace_data_loaded::DataLoadedSize;
-use crate::store::DataFrame;
-use crate::table::data::rows_to_columns;
-use crate::table::parquet::CubestoreParquetMetadataCache;
-use crate::table::{Row, TableValue, TimestampValue};
-use crate::telemetry::suboptimal_query_plan_event;
-use crate::util::memory::MemoryHandler;
-use crate::{app_metrics, CubeError};
-use async_trait::async_trait;
-use core::fmt;
 use datafusion::arrow::array::AsArray;
-use datafusion::arrow::array::{
-    make_array, Array, ArrayRef, BinaryArray, BooleanArray, Decimal128Array, Float64Array,
-    Int16Array, Int32Array, Int64Array, MutableArrayData, NullArray, StringArray,
-    TimestampMicrosecondArray, TimestampNanosecondArray, UInt16Array, UInt32Array, UInt64Array,
-    UInt8Array,
-};
-use datafusion::arrow::compute::SortOptions;
-use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit};
-use datafusion::arrow::ipc::reader::StreamReader;
-use datafusion::arrow::ipc::writer::StreamWriter;
+use datafusion::arrow::array::{ArrayRef, UInt16Array, UInt32Array, UInt64Array, UInt8Array};
+use datafusion::arrow::datatypes::SchemaRef;
 use datafusion::arrow::record_batch::RecordBatch;
-use datafusion::catalog::Session;
-use datafusion::common::ToDFSchema;
-use datafusion::config::TableParquetOptions;
-use datafusion::datasource::listing::PartitionedFile;
-use datafusion::datasource::object_store::ObjectStoreUrl;
-use datafusion::datasource::physical_plan::parquet::get_reader_options_customizer;
-use datafusion::datasource::physical_plan::{
-    FileScanConfig, ParquetFileReaderFactory, ParquetSource,
-};
-use datafusion::datasource::{TableProvider, TableType};
 use datafusion::dfschema::internal_err;
 use datafusion::dfschema::not_impl_err;
-use datafusion::error::DataFusionError;
 use datafusion::error::Result as DFResult;
 use datafusion::execution::{RecordBatchStream, TaskContext};
-use datafusion::logical_expr::{EmitTo, Expr, GroupsAccumulator, LogicalPlan};
+use datafusion::logical_expr::{EmitTo, GroupsAccumulator};
 use datafusion::physical_expr::expressions::Column as DFColumn;
-use datafusion::physical_expr::LexOrdering;
-use datafusion::physical_expr::{self, GroupsAccumulatorAdapter};
-use datafusion::physical_expr::{
-    Distribution, EquivalenceProperties, LexRequirement, PhysicalSortExpr, PhysicalSortRequirement,
-};
-use datafusion::physical_optimizer::aggregate_statistics::AggregateStatistics;
-use datafusion::physical_optimizer::combine_partial_final_agg::CombinePartialFinalAggregate;
-use datafusion::physical_optimizer::enforce_sorting::EnforceSorting;
-use datafusion::physical_optimizer::join_selection::JoinSelection;
-use datafusion::physical_optimizer::limit_pushdown::LimitPushdown;
-use datafusion::physical_optimizer::limited_distinct_aggregation::LimitedDistinctAggregation;
-use datafusion::physical_optimizer::output_requirements::OutputRequirements;
-use datafusion::physical_optimizer::projection_pushdown::ProjectionPushdown;
-use datafusion::physical_optimizer::sanity_checker::SanityCheckPlan;
-use datafusion::physical_optimizer::topk_aggregation::TopKAggregation;
-use datafusion::physical_optimizer::update_aggr_exprs::OptimizeAggregateOrder;
-use datafusion::physical_optimizer::PhysicalOptimizerRule;
+use datafusion::physical_expr::GroupsAccumulatorAdapter;
 use datafusion::physical_plan::aggregates::group_values::GroupValues;
 use datafusion::physical_plan::aggregates::*;
-use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec;
-use datafusion::physical_plan::empty::EmptyExec;
-use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType};
-use datafusion::physical_plan::projection::ProjectionExec;
-use datafusion::physical_plan::sorts::sort::SortExec;
-use datafusion::physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
-use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
 use datafusion::physical_plan::udaf::AggregateFunctionExpr;
-use datafusion::physical_plan::{
-    collect, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PhysicalExpr,
-    PlanProperties, SendableRecordBatchStream,
-};
-use datafusion::prelude::{and, SessionConfig, SessionContext};
-use datafusion_datasource::memory::MemorySourceConfig;
-use datafusion_datasource::source::DataSourceExec;
+use datafusion::physical_plan::{PhysicalExpr, SendableRecordBatchStream};
 use futures::ready;
-use futures::{
-    stream::{Stream, StreamExt},
-    Future,
-};
-use itertools::Itertools;
-use log::{debug, error, trace, warn};
-use mockall::automock;
-use serde_derive::{Deserialize, Serialize};
-use std::any::Any;
-use std::cmp::min;
-use std::collections::{HashMap, HashSet};
-use std::fmt::{Debug, Formatter};
-use std::io::Cursor;
-use std::mem::take;
+use futures::stream::{Stream, StreamExt};
+use std::fmt::Debug;
 use std::sync::Arc;
 use std::task::{Context, Poll};
-use std::time::SystemTime;
-use tarpc::context::current;
-use tracing::{instrument, Instrument};
 
 use super::new_sorted_group_values;
 use super::InlineAggregateExec;
@@ -160,12 +71,6 @@ impl InlineAggregateStream {
         // aggregate
         let aggregate_arguments =
             aggregate_expressions(&agg.aggr_expr, &agg.mode, agg_group_by.num_group_exprs())?;
-        // arguments for aggregating spilled data is the same as the one for final aggregation
-        let merging_aggregate_arguments = aggregate_expressions(
-            &agg.aggr_expr,
-            &InlineAggregateMode::Final,
-            agg_group_by.num_group_exprs(),
-        )?;
 
         let filter_expressions = match agg.mode {
             InlineAggregateMode::Partial => agg_filter_expr,
@@ -181,15 +86,6 @@ impl InlineAggregateStream {
 
         let group_schema = agg_group_by.group_schema(&agg.input().schema())?;
 
-        let partial_agg_schema = create_schema(
-            &agg.input().schema(),
-            &agg_group_by,
-            &aggregate_exprs,
-            InlineAggregateMode::Partial,
-        )?;
-
-        let partial_agg_schema = Arc::new(partial_agg_schema);
-
         let exec_state = ExecutionState::ReadingInput;
         let current_group_indices = Vec::with_capacity(batch_size);
         let group_values = new_sorted_group_values(group_schema)?;
@@ -211,36 +107,6 @@ impl InlineAggregateStream {
     }
 }
 
-fn create_schema(
-    input_schema: &Schema,
-    group_by: &PhysicalGroupBy,
-    aggr_expr: &[Arc<AggregateFunctionExpr>],
-    mode: InlineAggregateMode,
-) -> DFResult<Schema> {
-    let mut fields = Vec::with_capacity(group_by.num_output_exprs() + aggr_expr.len());
-    fields.extend(group_by.output_fields(input_schema)?);
-
-    match mode {
-        InlineAggregateMode::Partial => {
-            // in partial mode, the fields of the accumulator's state
-            for expr in aggr_expr {
-                fields.extend(expr.state_fields()?.iter().cloned());
-            }
-        }
-        InlineAggregateMode::Final => {
-            // in final mode, the field with the final result of the accumulator
-            for expr in aggr_expr {
-                fields.push(expr.field())
-            }
-        }
-    }
-
-    Ok(Schema::new_with_metadata(
-        fields,
-        input_schema.metadata().clone(),
-    ))
-}
-
 fn aggregate_expressions(
     aggr_expr: &[Arc<AggregateFunctionExpr>],
     mode: &InlineAggregateMode,
diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs
index 74866b34065c3..e8ea319ec4605 100644
--- a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs
+++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/mod.rs
@@ -6,108 +6,24 @@ mod sorted_group_values_rows;
 pub use sorted_group_values::SortedGroupValues;
 pub use sorted_group_values_rows::SortedGroupValuesRows;
 
-use crate::cluster::{
-    pick_worker_by_ids, pick_worker_by_partitions, Cluster, WorkerPlanningParams,
-};
-use crate::config::injection::DIService;
-use crate::config::ConfigObj;
-use crate::metastore::multi_index::MultiPartition;
-use crate::metastore::table::Table;
-use crate::metastore::{Column, ColumnType, IdRow, Index, Partition};
-use crate::queryplanner::filter_by_key_range::FilterByKeyRangeExec;
-use crate::queryplanner::merge_sort::LastRowByUniqueKeyExec;
-use crate::queryplanner::metadata_cache::{MetadataCacheFactory, NoopParquetMetadataCache};
-use crate::queryplanner::optimizations::{CubeQueryPlanner, PreOptimizeRule};
-use crate::queryplanner::physical_plan_flags::PhysicalPlanFlags;
-use crate::queryplanner::planning::{get_worker_plan, Snapshot, Snapshots};
-use crate::queryplanner::pretty_printers::{pp_phys_plan, pp_phys_plan_ext, pp_plan, PPOptions};
-use crate::queryplanner::serialized_plan::{IndexSnapshot, RowFilter, RowRange, SerializedPlan};
-use crate::queryplanner::trace_data_loaded::DataLoadedSize;
-use crate::store::DataFrame;
-use crate::table::data::rows_to_columns;
-use crate::table::parquet::CubestoreParquetMetadataCache;
-use crate::table::{Row, TableValue, TimestampValue};
-use crate::telemetry::suboptimal_query_plan_event;
-use crate::util::memory::MemoryHandler;
-use crate::{app_metrics, CubeError};
-use async_trait::async_trait;
-use core::fmt;
-use datafusion::arrow::array::{
-    make_array, Array, ArrayRef, BinaryArray, BooleanArray, Decimal128Array, Float64Array,
-    Int16Array, Int32Array, Int64Array, MutableArrayData, NullArray, StringArray,
-    TimestampMicrosecondArray, TimestampNanosecondArray, UInt16Array, UInt32Array, UInt64Array,
-    UInt8Array,
-};
-use datafusion::arrow::compute::SortOptions;
-use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit};
-use datafusion::arrow::ipc::reader::StreamReader;
-use datafusion::arrow::ipc::writer::StreamWriter;
-use datafusion::arrow::record_batch::RecordBatch;
-use datafusion::catalog::Session;
+use datafusion::arrow::datatypes::{DataType, SchemaRef};
 use datafusion::common::stats::Precision;
-use datafusion::common::{Statistics, ToDFSchema};
-use datafusion::config::TableParquetOptions;
-use datafusion::datasource::listing::PartitionedFile;
-use datafusion::datasource::object_store::ObjectStoreUrl;
-use datafusion::datasource::physical_plan::parquet::get_reader_options_customizer;
-use datafusion::datasource::physical_plan::{
-    FileScanConfig, ParquetFileReaderFactory, ParquetSource,
-};
-use datafusion::datasource::{TableProvider, TableType};
-use datafusion::dfschema::{internal_err, not_impl_err};
-use datafusion::error::DataFusionError;
+use datafusion::common::Statistics;
 use datafusion::error::Result as DFResult;
 use datafusion::execution::TaskContext;
-use datafusion::logical_expr::{Expr, LogicalPlan};
-use datafusion::physical_expr;
 use datafusion::physical_expr::aggregate::AggregateFunctionExpr;
-use datafusion::physical_expr::LexOrdering;
-use datafusion::physical_expr::{
-    Distribution, EquivalenceProperties, LexRequirement, PhysicalSortExpr, PhysicalSortRequirement,
-};
-use datafusion::physical_optimizer::aggregate_statistics::AggregateStatistics;
-use datafusion::physical_optimizer::combine_partial_final_agg::CombinePartialFinalAggregate;
-use datafusion::physical_optimizer::enforce_sorting::EnforceSorting;
-use datafusion::physical_optimizer::join_selection::JoinSelection;
-use datafusion::physical_optimizer::limit_pushdown::LimitPushdown;
-use datafusion::physical_optimizer::limited_distinct_aggregation::LimitedDistinctAggregation;
-use datafusion::physical_optimizer::output_requirements::OutputRequirements;
-use datafusion::physical_optimizer::projection_pushdown::ProjectionPushdown;
-use datafusion::physical_optimizer::sanity_checker::SanityCheckPlan;
-use datafusion::physical_optimizer::topk_aggregation::TopKAggregation;
-use datafusion::physical_optimizer::update_aggr_exprs::OptimizeAggregateOrder;
-use datafusion::physical_optimizer::PhysicalOptimizerRule;
+use datafusion::physical_expr::{Distribution, LexRequirement};
 use datafusion::physical_plan::aggregates::group_values::GroupValues;
-use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec;
-use datafusion::physical_plan::empty::EmptyExec;
-use datafusion::physical_plan::execution_plan::{Boundedness, CardinalityEffect, EmissionType};
+use datafusion::physical_plan::execution_plan::CardinalityEffect;
 use datafusion::physical_plan::metrics::MetricsSet;
-use datafusion::physical_plan::projection::ProjectionExec;
-use datafusion::physical_plan::sorts::sort::SortExec;
-use datafusion::physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
-use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
 use datafusion::physical_plan::{aggregates::*, InputOrderMode};
 use datafusion::physical_plan::{
-    collect, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PhysicalExpr,
-    PlanProperties, SendableRecordBatchStream,
+    DisplayAs, DisplayFormatType, ExecutionPlan, PhysicalExpr, PlanProperties,
+    SendableRecordBatchStream,
 };
-use datafusion::prelude::{and, SessionConfig, SessionContext};
-use datafusion_datasource::memory::MemorySourceConfig;
-use datafusion_datasource::source::DataSourceExec;
-use futures_util::{stream, StreamExt, TryStreamExt};
-use itertools::Itertools;
-use log::{debug, error, trace, warn};
-use mockall::automock;
-use serde_derive::{Deserialize, Serialize};
 use std::any::Any;
-use std::cmp::min;
-use std::collections::{HashMap, HashSet};
-use std::fmt::{Debug, Formatter};
-use std::io::Cursor;
-use std::mem::take;
+use std::fmt::Debug;
 use std::sync::Arc;
-use std::time::SystemTime;
-use tracing::{instrument, Instrument};
 
 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
 pub enum InlineAggregateMode {
@@ -202,6 +118,10 @@ impl InlineAggregateExec {
     pub fn input(&self) -> &Arc<dyn ExecutionPlan> {
         &self.input
     }
+
+    pub fn group_expr(&self) -> &PhysicalGroupBy {
+        &self.group_by
+    }
 }
 
 impl DisplayAs for InlineAggregateExec {
diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs
index d9064aaf9ce16..e7c0e82b2f7cb 100644
--- a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs
+++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values.rs
@@ -1,19 +1,18 @@
 use datafusion::logical_expr::EmitTo;
 use datafusion::physical_plan::aggregates::group_values::multi_group_by::GroupColumn;
 
-use std::mem::{self, size_of};
+use std::mem::{self};
 
 use datafusion::arrow::array::{Array, ArrayRef, RecordBatch};
 use datafusion::arrow::compute::cast;
 use datafusion::arrow::datatypes::{
     BinaryType, BinaryViewType, DataType, Date32Type, Date64Type, Decimal128Type, Float32Type,
-    Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, LargeBinaryType, LargeUtf8Type, Schema,
+    Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, LargeBinaryType, LargeUtf8Type,
     SchemaRef, StringViewType, Time32MillisecondType, Time32SecondType, Time64MicrosecondType,
     Time64NanosecondType, TimeUnit, TimestampMicrosecondType, TimestampMillisecondType,
     TimestampNanosecondType, TimestampSecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
     Utf8Type,
 };
-use datafusion::dfschema::internal_err;
 use datafusion::dfschema::not_impl_err;
 use datafusion::error::{DataFusionError, Result as DFResult};
 use datafusion::physical_expr::binary_map::OutputType;
@@ -384,7 +383,7 @@ impl GroupValues for SortedGroupValues {
         Ok(output)
     }
 
-    fn clear_shrink(&mut self, batch: &RecordBatch) {
+    fn clear_shrink(&mut self, _batch: &RecordBatch) {
         self.group_values.clear();
         self.comparators.clear();
         self.rows_inds.clear();
diff --git a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values_rows.rs b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values_rows.rs
index 199cce192e587..cde67cdb88706 100644
--- a/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values_rows.rs
+++ b/rust/cubestore/cubestore/src/queryplanner/inline_aggregate/sorted_group_values_rows.rs
@@ -1,12 +1,10 @@
 use datafusion::logical_expr::EmitTo;
-use std::mem::{self, size_of};
 
 use datafusion::arrow::array::{Array, ArrayRef, ListArray, RecordBatch, StructArray};
 use datafusion::arrow::compute::cast;
 use datafusion::arrow::datatypes::{DataType, SchemaRef};
 use datafusion::arrow::row::{RowConverter, Rows, SortField};
-use datafusion::dfschema::internal_err;
-use datafusion::error::{DataFusionError, Result as DFResult};
+use datafusion::error::Result as DFResult;
 use datafusion::physical_plan::aggregates::group_values::GroupValues;
 
 use std::sync::Arc;
@@ -52,8 +50,7 @@ impl SortedGroupValuesRows {
 
         let starting_rows_capacity = 1000;
         let starting_data_capacity = 64 * starting_rows_capacity;
-        let rows_buffer =
-            row_converter.empty_rows(starting_rows_capacity, starting_data_capacity);
+        let rows_buffer = row_converter.empty_rows(starting_rows_capacity, starting_data_capacity);
 
         Ok(Self {
             schema,
@@ -169,8 +166,7 @@ impl GroupValues for SortedGroupValuesRows {
         // Handle dictionary encoding for output
         for (field, array) in self.schema.fields.iter().zip(&mut output) {
             let expected = field.data_type();
-            *array =
-                dictionary_encode_if_necessary(Arc::<dyn Array>::clone(array), expected)?;
+            *array = dictionary_encode_if_necessary(Arc::<dyn Array>::clone(array), expected)?;
         }
 
         self.group_values = Some(group_values);
@@ -185,10 +181,7 @@ impl GroupValues for SortedGroupValuesRows {
     }
 }
 
-fn dictionary_encode_if_necessary(
-    array: ArrayRef,
-    expected: &DataType,
-) -> DFResult<ArrayRef> {
+fn dictionary_encode_if_necessary(array: ArrayRef, expected: &DataType) -> DFResult<ArrayRef> {
     match (expected, array.data_type()) {
         (DataType::Struct(expected_fields), _) => {
             let struct_array = array.as_any().downcast_ref::<StructArray>().unwrap();
diff --git a/rust/cubestore/cubestore/src/queryplanner/mod.rs b/rust/cubestore/cubestore/src/queryplanner/mod.rs
index 464337c5bb5f3..f3f86adb4ba7f 100644
--- a/rust/cubestore/cubestore/src/queryplanner/mod.rs
+++ b/rust/cubestore/cubestore/src/queryplanner/mod.rs
@@ -22,6 +22,7 @@ use serialized_plan::PreSerializedPlan;
 pub use topk::MIN_TOPK_STREAM_ROWS;
 mod filter_by_key_range;
 pub mod info_schema;
+mod inline_aggregate;
 pub mod merge_sort;
 pub mod metadata_cache;
 pub mod providers;
@@ -30,7 +31,6 @@ mod rolling;
 mod test_utils;
 pub mod udf_xirr;
 pub mod udfs;
-mod inline_aggregate;
 
 use crate::cachestore::CacheStore;
 use crate::config::injection::DIService;
diff --git a/rust/cubestore/cubestore/src/queryplanner/optimizations/mod.rs b/rust/cubestore/cubestore/src/queryplanner/optimizations/mod.rs
index 5746261938e1f..0359e64c476db 100644
--- a/rust/cubestore/cubestore/src/queryplanner/optimizations/mod.rs
+++ b/rust/cubestore/cubestore/src/queryplanner/optimizations/mod.rs
@@ -145,7 +145,7 @@ fn pre_optimize_physical_plan(
     let p = ensure_partition_merge(p)?;
 
     // Replace sorted AggregateExec with InlineAggregateExec for better performance
-    //let p = rewrite_physical_plan(p, &mut |p| replace_with_inline_aggregate(p))?;
+    let p = rewrite_physical_plan(p, &mut |p| replace_with_inline_aggregate(p))?;
 
     Ok(p)
 }
diff --git a/rust/cubestore/cubestore/src/queryplanner/physical_plan_flags.rs b/rust/cubestore/cubestore/src/queryplanner/physical_plan_flags.rs
index 67af1317dea67..e05791b7af7f4 100644
--- a/rust/cubestore/cubestore/src/queryplanner/physical_plan_flags.rs
+++ b/rust/cubestore/cubestore/src/queryplanner/physical_plan_flags.rs
@@ -8,6 +8,7 @@ use datafusion::physical_plan::{ExecutionPlan, InputOrderMode, PhysicalExpr};
 use serde::Serialize;
 use serde_json::{json, Value};
 
+use crate::queryplanner::inline_aggregate::InlineAggregateExec;
 use crate::queryplanner::query_executor::CubeTableExec;
 
 #[derive(Serialize, Debug)]
@@ -36,7 +37,14 @@ impl PhysicalPlanFlags {
 
     fn physical_plan_flags_fill(p: &dyn ExecutionPlan, flags: &mut PhysicalPlanFlags) {
         let a = p.as_any();
-        if let Some(agg) = a.downcast_ref::<AggregateExec>() {
+        if let Some(agg) = a.downcast_ref::<InlineAggregateExec>() {
+            flags.merge_sort_plan = true;
+
+            // Stop the recursion if we have an optimal plan with groups, otherwise continue to check the children, filters for example
+            if agg.group_expr().expr().len() > 0 && flags.merge_sort_plan {
+                return;
+            }
+        } else if let Some(agg) = a.downcast_ref::<AggregateExec>() {
             let is_final_hash_agg_without_groups = agg.mode() == &AggregateMode::Final
                 && agg.input_order_mode() == &InputOrderMode::Linear
                 && agg.group_expr().expr().len() == 0;
diff --git a/rust/cubestore/cubestore/src/queryplanner/pretty_printers.rs b/rust/cubestore/cubestore/src/queryplanner/pretty_printers.rs
index d9c353d1d1095..25006bc3aeedb 100644
--- a/rust/cubestore/cubestore/src/queryplanner/pretty_printers.rs
+++ b/rust/cubestore/cubestore/src/queryplanner/pretty_printers.rs
@@ -610,7 +610,7 @@ fn pp_phys_plan_indented(p: &dyn ExecutionPlan, indent: usize, o: &PPOptions, ou
                 InlineAggregateMode::Partial => "Partial",
                 InlineAggregateMode::Final => "Final",
             };
-            *out += &format!("{}InlineAggregate", mode);
+            *out += &format!("Inline{}Aggregate", mode);
             if o.show_aggregations {
                 *out += &format!(", aggs: {:?}", agg.aggr_expr())
             }
diff --git a/rust/cubestore/cubezetasketch/src/data.rs b/rust/cubestore/cubezetasketch/src/data.rs
index 3835ddaf3546a..15f938451b8f7 100644
--- a/rust/cubestore/cubezetasketch/src/data.rs
+++ b/rust/cubestore/cubezetasketch/src/data.rs
@@ -56,14 +56,14 @@ pub fn linear_counting_threshold(precision: i32) -> i32 {
         350000, // precision 18
     ];
 
-    if MINIMUM_PRECISION <= precision && precision <= MAXIMUM_PRECISION {
+    if (MINIMUM_PRECISION..=MAXIMUM_PRECISION).contains(&precision) {
         return LINEAR_COUNTING_THRESHOLD[(precision - MINIMUM_PRECISION) as usize];
     }
 
     // Fall back to the threshold of 5m/2 as used in the original HLL paper for precisions where
     // empirical thresholds have not yet been determined. See the HLL++ paper
     // (https://goo.gl/pc916Z) Section 5.2 for details.
-    return 5 * (1 << precision) / 2;
+    5 * (1 << precision) / 2
 }
 
 /// Returns the value of *α_m* (where *m = 2^precision*) as
@@ -75,7 +75,7 @@ pub fn alpha(precision: i32) -> f64 {
     //
     // where m is 2 ^ precision. The values were taken verbatim from the Go
     // and C++ implementations.
-    return 0.7213 / (1. + 1.079 / (1 << precision) as f64);
+    0.7213 / (1. + 1.079 / (1 << precision) as f64)
 }
 
 /// Returns the bias correction for the given estimate and precision. These values have been
@@ -100,13 +100,13 @@ pub fn estimate_bias(estimate: f64, precision: i32) -> f64 {
         total_weight += 1.0 / bias.distance;
         sum += bias.bias / bias.distance;
     }
-    return sum / total_weight;
+    sum / total_weight
 }
 
 /// Returns 6 closest biases and their distance to the estimate, sorted by increasing distance.
 fn closest_biases(estimate: f64, precision: i32) -> Vec<WeightedBias> {
     // Return no bias correction when precision is out of defined bounds.
-    if precision < MINIMUM_PRECISION || MAXIMUM_PRECISION < precision {
+    if !(MINIMUM_PRECISION..=MAXIMUM_PRECISION).contains(&precision) {
         return Vec::new();
     }
 
@@ -145,7 +145,7 @@ fn closest_biases(estimate: f64, precision: i32) -> Vec<WeightedBias> {
 
     result.sort_by(|l, r| l.distance.partial_cmp(&r.distance).unwrap());
     result.truncate(NUMBER_OF_NEIGHBORS_IN_KNN);
-    return result;
+    result
 }
 
 struct WeightedBias {
diff --git a/rust/cubestore/cubezetasketch/src/difference_encoding.rs b/rust/cubestore/cubezetasketch/src/difference_encoding.rs
index 7516732e90ba7..a44b7d67a5c8c 100644
--- a/rust/cubestore/cubezetasketch/src/difference_encoding.rs
+++ b/rust/cubestore/cubezetasketch/src/difference_encoding.rs
@@ -30,7 +30,7 @@ pub struct DifferenceEncoder<'l> {
 
 impl DifferenceEncoder<'_> {
     pub fn new(buf: &mut Vec<u8>) -> DifferenceEncoder {
-        return DifferenceEncoder { buf, last: 0 };
+        DifferenceEncoder { buf, last: 0 }
     }
 
     /// Writes the integer value into the buffer using difference encoding.
@@ -78,7 +78,7 @@ fn read_varint(data: &[u8]) -> Result<(/*result*/ u32, /*bytes read*/ usize)> {
             break;
         }
     }
-    return Ok((result, offset));
+    Ok((result, offset))
 }
 
 #[derive(Debug, Clone, Copy)]
@@ -89,7 +89,7 @@ pub struct DifferenceDecoder<'l> {
 
 impl DifferenceDecoder<'_> {
     pub fn new(data: &[u8]) -> DifferenceDecoder {
-        return DifferenceDecoder { data, last: 0 };
+        DifferenceDecoder { data, last: 0 }
     }
 }
 
@@ -104,11 +104,11 @@ impl Iterator for DifferenceDecoder<'_> {
             Ok((n, cnt)) => {
                 self.data = &self.data[cnt..];
                 self.last += n;
-                return Some(Ok(self.last));
+                Some(Ok(self.last))
             }
             Err(e) => {
                 self.data = &[]; // stop on error.
-                return Some(Err(e));
+                Some(Err(e))
             }
         }
     }
diff --git a/rust/cubestore/cubezetasketch/src/encoding.rs b/rust/cubestore/cubezetasketch/src/encoding.rs
index 6e06eb5ea3e4a..9b80c1f0fec54 100644
--- a/rust/cubestore/cubezetasketch/src/encoding.rs
+++ b/rust/cubestore/cubezetasketch/src/encoding.rs
@@ -29,9 +29,9 @@ pub struct NormalEncoding {
 
 impl NormalEncoding {
     pub fn new(precision: i32) -> NormalEncoding {
-        assert!(1 <= precision && precision <= 63,
+        assert!((1..=63).contains(&precision),
          "valid index and rhoW can only be determined for precisions in the range [1, 63], but got {}", precision);
-        return NormalEncoding { precision };
+        NormalEncoding { precision }
     }
 }
 
@@ -81,13 +81,13 @@ impl SparseEncoding {
         // implementation uses signed or unsigned integers. The upper limit for the normal precision
         // is therefore 31 - RHOW_BITS - 1 (for flag).
         assert!(
-            1 <= normal_precision && normal_precision <= 24,
+            (1..=24).contains(&normal_precision),
             "normal precision must be between 1 and 24 (inclusive), got {}",
             normal_precision
         );
         // While for the sparse precision it is 31 - 1 (for flag).
         assert!(
-            1 <= sparse_precision && sparse_precision <= 30,
+            (1..=30).contains(&sparse_precision),
             "sparse precision must be between 1 and 30 (inclusive), got {}",
             sparse_precision
         );
@@ -98,11 +98,11 @@ impl SparseEncoding {
         // non-rhoW encoded values so that (a) the two values can be distinguished and (b) they will
         // not interleave when sorted numerically.
         let rho_encoded_flag = 1 << max(sparse_precision, normal_precision + Self::RHOW_BITS);
-        return SparseEncoding {
+        SparseEncoding {
             normal_precision,
             sparse_precision,
             rho_encoded_flag,
-        };
+        }
     }
 
     /// Checks whether a sparse encoding is compatible with another.
@@ -124,16 +124,16 @@ impl SparseEncoding {
     pub(crate) fn decode_sparse_index(&self, sparse_value: i32) -> i32 {
         // If the sparse rhoW' is not encoded, then the value consists of just the sparse index.
         if (sparse_value & self.rho_encoded_flag) == 0 {
-            return sparse_value as i32;
+            return sparse_value;
         }
 
         // When the sparse rhoW' is encoded, this indicates that the last sp-p bits of the sparse
         // index were all zero. We return the normal index right zero padded by sp-p bits since the
         // sparse index is just the normal index without the trailing zeros.
-        return ((sparse_value ^ self.rho_encoded_flag) // Strip the encoding flag.
+        ((sparse_value ^ self.rho_encoded_flag) // Strip the encoding flag.
             >> Self::RHOW_BITS) // Strip the rhoW'
         // Shift the normal index to sparse index length.
-        << (self.sparse_precision - self.normal_precision);
+        << (self.sparse_precision - self.normal_precision)
     }
 
     /// Decodes the normal index from an encoded sparse value. See the class Javadoc for details on
@@ -147,7 +147,7 @@ impl SparseEncoding {
 
         // Sparse rhoW' encoded values contain a normal index so we extract it by stripping the flag
         // off the front and the rhoW' off the end.
-        return (sparse_value ^ self.rho_encoded_flag) >> Self::RHOW_BITS;
+        (sparse_value ^ self.rho_encoded_flag) >> Self::RHOW_BITS
     }
 
     /// Decodes the normal *ρ(w)* from an encoded sparse value. See the class Javadoc for
@@ -164,8 +164,7 @@ impl SparseEncoding {
 
         // If the sparse rhoW' was encoded, this tells us that the last sp-p bits of the
         // sparse index where all zero. The normal rhoW is therefore rhoW' + sp - p.
-        return ((sparse_value & Self::RHOW_MASK) + self.sparse_precision - self.normal_precision)
-            as u8;
+        ((sparse_value & Self::RHOW_MASK) + self.sparse_precision - self.normal_precision) as u8
     }
 }
 
@@ -175,9 +174,9 @@ fn compute_rho_w(value: u64, bits: i32) -> u8 {
     let w = value << (64 - bits);
 
     // If the rhoW consists only of zeros, return the maximum length of bits + 1.
-    return if w == 0 {
+    if w == 0 {
         bits as u8 + 1
     } else {
         w.leading_zeros() as u8 + 1
-    };
+    }
 }
diff --git a/rust/cubestore/cubezetasketch/src/error.rs b/rust/cubestore/cubezetasketch/src/error.rs
index 988c94c068789..3e2fff989b7dd 100644
--- a/rust/cubestore/cubezetasketch/src/error.rs
+++ b/rust/cubestore/cubezetasketch/src/error.rs
@@ -32,26 +32,26 @@ impl Display for ZetaError {
 
 impl ZetaError {
     pub fn new<Str: ToString>(message: Str) -> ZetaError {
-        return ZetaError {
+        ZetaError {
             message: message.to_string(),
-        };
+        }
     }
 }
 
 impl From<std::io::Error> for ZetaError {
     fn from(err: std::io::Error) -> Self {
-        return ZetaError::new(err);
+        ZetaError::new(err)
     }
 }
 
 impl From<ProtobufError> for ZetaError {
     fn from(err: ProtobufError) -> Self {
-        return ZetaError::new(format!("Protobuf: {}", err));
+        ZetaError::new(format!("Protobuf: {}", err))
     }
 }
 
 impl From<TryFromIntError> for ZetaError {
     fn from(err: TryFromIntError) -> Self {
-        return ZetaError::new(err);
+        ZetaError::new(err)
     }
 }
diff --git a/rust/cubestore/cubezetasketch/src/normal.rs b/rust/cubestore/cubezetasketch/src/normal.rs
index 1bf1c3570bb0d..6dbc816a67923 100644
--- a/rust/cubestore/cubezetasketch/src/normal.rs
+++ b/rust/cubestore/cubezetasketch/src/normal.rs
@@ -47,15 +47,15 @@ impl NormalRepresentation {
             )));
         }
 
-        return Ok(NormalRepresentation {
+        Ok(NormalRepresentation {
             encoding: NormalEncoding::new(state.precision),
-        });
+        })
     }
     /**
      * Checks that the precision is valid for a normal representation.
      */
     pub fn check_precision(precision: i32) -> Result<()> {
-        if !(Self::MINIMUM_PRECISION <= precision && precision <= Self::MAXIMUM_PRECISION) {
+        if !(Self::MINIMUM_PRECISION..=Self::MAXIMUM_PRECISION).contains(&precision) {
             return Err(ZetaError::new(format!(
                 "Expected normal precision to be >= {} and <= {} but was {}",
                 Self::MINIMUM_PRECISION,
@@ -63,7 +63,7 @@ impl NormalRepresentation {
                 precision
             )));
         }
-        return Ok(());
+        Ok(())
     }
 
     /// Computes the cardinality estimate according to the algorithm in Figure 6 of the HLL++ paper
@@ -94,7 +94,7 @@ impl NormalRepresentation {
                 "invalid byte in normal encoding: {}",
                 v
             );
-            sum += 1.0 / ((1 as u64) << (v as u64)) as f64;
+            sum += 1.0 / (1_u64 << (v as u64)) as f64;
         }
 
         // Return the LinearCount for small cardinalities where, as explained in the HLL++ paper
@@ -113,7 +113,7 @@ impl NormalRepresentation {
         // Perform bias correction on small estimates. HyperLogLogPlusPlusData only contains bias
         // estimates for small cardinalities and returns 0 for anything else, so the "E < 5m" guard from
         // the HLL++ paper (https://goo.gl/pc916Z) is superfluous here.
-        return (estimate - estimate_bias(estimate, state.precision)).round() as u64;
+        (estimate - estimate_bias(estimate, state.precision)).round() as u64
     }
 
     pub fn merge_with_sparse(
@@ -124,10 +124,10 @@ impl NormalRepresentation {
     ) -> Result<()> {
         self.add_sparse_values(
             state,
-            &other.encoding(),
+            other.encoding(),
             SparseRepresentation::sorted_iterator(other_state.sparse_data.as_deref()),
         )?;
-        return Ok(());
+        Ok(())
     }
 
     /// Merges a HyperLogLog++ sourceData array into a state, downgrading the values from the source
@@ -181,7 +181,7 @@ impl NormalRepresentation {
             }
         }
 
-        return Ok(());
+        Ok(())
     }
 
     fn ensure_data(state: &mut State) {
diff --git a/rust/cubestore/cubezetasketch/src/sketch.rs b/rust/cubestore/cubezetasketch/src/sketch.rs
index 9bfce2cd69eae..e7d8ffcfdf31d 100644
--- a/rust/cubestore/cubezetasketch/src/sketch.rs
+++ b/rust/cubestore/cubezetasketch/src/sketch.rs
@@ -62,9 +62,9 @@ pub enum Representation {
 impl Representation {
     fn from_state(state: &State) -> Result<Representation> {
         if state.has_data() {
-            return Ok(Representation::Normal(NormalRepresentation::new(state)?));
+            Ok(Representation::Normal(NormalRepresentation::new(state)?))
         } else {
-            return Ok(Representation::Sparse(SparseRepresentation::new(state)?));
+            Ok(Representation::Sparse(SparseRepresentation::new(state)?))
         }
     }
 
@@ -107,7 +107,7 @@ impl HyperLogLogPlusPlus {
     ///
     /// `proto` is a valid aggregator state of type `AggregatorType::HYPERLOGLOG_PLUS_UNIQUE`.
     pub fn read(proto: &[u8]) -> Result<HyperLogLogPlusPlus> {
-        return Self::for_coded_input(CodedInputStream::from_bytes(proto));
+        Self::for_coded_input(CodedInputStream::from_bytes(proto))
     }
 
     pub fn write(&self) -> Vec<u8> {
@@ -119,19 +119,19 @@ impl HyperLogLogPlusPlus {
                 return state.to_byte_array();
             }
         }
-        return self.state.to_byte_array();
+        self.state.to_byte_array()
     }
 
     pub fn cardinality(&mut self) -> u64 {
         match &mut self.representation {
-            Representation::Sparse(r) => return r.cardinality(&mut self.state),
-            Representation::Normal(r) => return r.cardinality(&self.state),
+            Representation::Sparse(r) => r.cardinality(&mut self.state),
+            Representation::Normal(r) => r.cardinality(&self.state),
         }
     }
 
     pub fn is_compatible(&self, other: &HyperLogLogPlusPlus) -> bool {
-        return self.state.precision == other.state.precision
-            && self.state.sparse_precision == other.state.sparse_precision;
+        self.state.precision == other.state.precision
+            && self.state.sparse_precision == other.state.sparse_precision
     }
 
     /// Will crash if `self.is_compatible(other)` returns false.
@@ -166,21 +166,21 @@ impl HyperLogLogPlusPlus {
         if let Some(n) = new_repr {
             self.representation = Representation::Normal(n)
         }
-        return Ok(());
+        Ok(())
     }
 
     fn for_coded_input(proto: CodedInputStream) -> Result<HyperLogLogPlusPlus> {
-        return Self::from_state(State::parse_stream(proto)?);
+        Self::from_state(State::parse_stream(proto)?)
     }
 
     fn from_state(state: State) -> Result<HyperLogLogPlusPlus> {
-        if !(state.type_ == AGGREGATOR_TYPE_HYPERLOGLOG_PLUS_UNIQUE) {
+        if state.type_ != AGGREGATOR_TYPE_HYPERLOGLOG_PLUS_UNIQUE {
             return Err(ZetaError::new(format!(
                 "Expected proto to be of type HYPERLOGLOG_PLUS_UNIQUE but was {:?}",
                 state.type_
             )));
         }
-        if !(state.encoding_version == Self::ENCODING_VERSION) {
+        if state.encoding_version != Self::ENCODING_VERSION {
             return Err(ZetaError::new(format!(
                 "Expected encoding version to be {} but was {}",
                 Self::ENCODING_VERSION,
@@ -190,10 +190,10 @@ impl HyperLogLogPlusPlus {
         // TODO: implement or remove.
         // allowedTypes = Type.extractAndNormalize(state);
         let representation = Representation::from_state(&state)?;
-        return Ok(HyperLogLogPlusPlus {
+        Ok(HyperLogLogPlusPlus {
             state,
             representation,
-        });
+        })
     }
 
     /// Allocated size not including size_of::<Self>.  Must be exact.
diff --git a/rust/cubestore/cubezetasketch/src/sparse.rs b/rust/cubestore/cubezetasketch/src/sparse.rs
index a20aa48ee4a52..263a3e6402313 100644
--- a/rust/cubestore/cubezetasketch/src/sparse.rs
+++ b/rust/cubestore/cubezetasketch/src/sparse.rs
@@ -103,7 +103,7 @@ impl SparseRepresentation {
 
         // Compute size limits for the encoded sparse data and temporary buffer relative to what the
         // normal representation would require (which is 2^p bytes).
-        if !(state.precision < 31) {
+        if state.precision >= 31 {
             return Err(ZetaError::new(format!(
                 "expected precision < 31, got {}",
                 state.precision
@@ -126,16 +126,16 @@ impl SparseRepresentation {
         }
         // We have no good way of checking whether the data actually contains the given number of
         // elements without decoding the data, which would be inefficient here.
-        return Ok(SparseRepresentation {
+        Ok(SparseRepresentation {
             max_sparse_data_bytes,
             encoding,
             max_buffer_elements,
             buffer: BTreeSet::new(),
-        });
+        })
     }
 
     pub fn encoding(&self) -> &SparseEncoding {
-        return &self.encoding;
+        &self.encoding
     }
 
     fn check_precision(normal_precision: i32, sparse_precision: i32) -> Result<()> {
@@ -150,7 +150,7 @@ impl SparseRepresentation {
                 sparse_precision
             )));
         }
-        return Ok(());
+        Ok(())
     }
 
     pub fn cardinality(&mut self, state: &mut State) -> u64 {
@@ -163,7 +163,7 @@ impl SparseRepresentation {
         let num_zeros = buckets - state.sparse_size;
         let estimate = buckets as f64 * (buckets as f64 / num_zeros as f64).ln();
 
-        return estimate.round() as u64;
+        estimate.round() as u64
     }
 
     /// `self` may end up be in the invalid state on error and must not be used further.
@@ -175,7 +175,7 @@ impl SparseRepresentation {
     ) -> Result<Option<NormalRepresentation>> {
         // TODO: Add special case when 'this' is empty and 'other' has only encoded data.
         // In that case, we can just copy over the sparse data without needing to decode and dedupe.
-        return self.add_sparse_values(state, other, other_state);
+        self.add_sparse_values(state, other, other_state)
     }
 
     #[must_use]
@@ -187,7 +187,7 @@ impl SparseRepresentation {
     ) -> Result<Option<NormalRepresentation>> {
         let mut normal = self.normalize(state)?;
         normal.merge_with_normal(state, other, other_state);
-        return Ok(Some(normal));
+        Ok(Some(normal))
     }
 
     fn add_sparse_values(
@@ -224,7 +224,7 @@ impl SparseRepresentation {
             )?;
         }
         // TODO: Merge without risking to grow this representation above its maximum size.
-        return Ok(self.update_representation(state)?);
+        self.update_representation(state)
     }
 
     fn merge_and_set<Iter1, Iter2>(
@@ -318,7 +318,7 @@ impl SparseRepresentation {
             }
         }
         let size = s.size;
-        return Self::set_sparse(state, data, size);
+        Self::set_sparse(state, data, size)
     }
 
     fn set_sparse(state: &mut State, data: Vec<u8>, size: i32) -> Result<()> {
@@ -328,10 +328,10 @@ impl SparseRepresentation {
     }
 
     pub(crate) fn sorted_iterator(sparse_data: Option<&[u8]>) -> DifferenceDecoder {
-        return DifferenceDecoder::new(sparse_data.unwrap_or(&[]));
+        DifferenceDecoder::new(sparse_data.unwrap_or(&[]))
     }
 
-    fn buffer_iterator<'a>(&'a self) -> impl Iterator<Item = Result<u32>> + 'a {
+    fn buffer_iterator(&self) -> impl Iterator<Item = Result<u32>> + '_ {
         self.buffer.iter().map(|v| Ok(*v))
     }
 
@@ -364,7 +364,7 @@ impl SparseRepresentation {
             return Ok(Some(self.normalize(state)?));
         }
 
-        return Ok(None);
+        Ok(None)
     }
 
     /// Convert to `NormalRepresentation`.
@@ -384,7 +384,7 @@ impl SparseRepresentation {
             self.buffer.clear();
         }
 
-        return Ok(representation);
+        Ok(representation)
     }
 
     pub fn requires_compaction(&self) -> bool {
@@ -407,7 +407,7 @@ impl SparseRepresentation {
             self.buffer_iterator(),
         )?;
         self.buffer.clear();
-        return Ok(());
+        Ok(())
     }
 
     /// Allocated size (not including size_of::<Self>).  Must be exact.
diff --git a/rust/cubestore/cubezetasketch/src/state.rs b/rust/cubestore/cubezetasketch/src/state.rs
index 8d001a8fc727f..755024142b774 100644
--- a/rust/cubestore/cubezetasketch/src/state.rs
+++ b/rust/cubestore/cubezetasketch/src/state.rs
@@ -61,7 +61,7 @@ pub struct State {
 
 impl Default for State {
     fn default() -> Self {
-        return State {
+        State {
             type_: DEFAULT_TYPE,
             num_values: DEFAULT_NUM_VALUES,
             encoding_version: DEFAULT_ENCODING_VERSION,
@@ -71,7 +71,7 @@ impl Default for State {
             sparse_precision: DEFAULT_SPARSE_PRECISION_OR_NUM_BUCKETS,
             data: None,
             sparse_data: None,
-        };
+        }
     }
 }
 
@@ -134,7 +134,7 @@ const DEFAULT_SPARSE_PRECISION_OR_NUM_BUCKETS: i32 = 0;
 impl State {
     // TODO: remove, change data from Option<> to Vec<>
     pub fn has_data(&self) -> bool {
-        return self.data.is_some() && !self.data.as_ref().unwrap().is_empty();
+        self.data.is_some() && !self.data.as_ref().unwrap().is_empty()
     }
 
     /// Parses a serialized HyperLogLog++ `AggregatorStateProto` and populates this object's
@@ -161,7 +161,7 @@ impl State {
             }
         }
 
-        return Ok(s);
+        Ok(s)
     }
 
     /// Parses a `HyperLogLogPlusUniqueStateProto` message. Since the message is nested within an
@@ -182,7 +182,7 @@ impl State {
                 _ => input.skip_field(wire_type)?,
             }
         }
-        return Ok(());
+        Ok(())
     }
 
     pub fn to_byte_array(&self) -> Vec<u8> {
@@ -191,7 +191,7 @@ impl State {
         let mut output = CodedOutputStream::bytes(result.as_mut_slice());
         self.write_to(hll_size, &mut output);
         output.check_eof();
-        return result;
+        result
     }
 
     fn write_to(&self, hll_size: u32, stream: &mut CodedOutputStream) {
@@ -279,7 +279,7 @@ impl State {
         size += hll_size.len_varint();
         size += hll_size;
 
-        return (size, hll_size);
+        (size, hll_size)
     }
 
     fn get_serialized_hll_size(&self) -> u32 {
@@ -312,7 +312,7 @@ impl State {
             size += sparse_data.len() as u32;
         }
 
-        return size;
+        size
     }
 
     /// Allocated size not including size_of::<Self>().  Must be exact (or worst-case).
@@ -323,10 +323,10 @@ impl State {
 
         let mut sum = 0;
         if let Some(d) = &self.data {
-            sum += vec_alloc_size(&d);
+            sum += vec_alloc_size(d);
         }
         if let Some(sd) = &self.sparse_data {
-            sum += vec_alloc_size(&sd);
+            sum += vec_alloc_size(sd);
         }
         sum
     }