Merge pull request ClickHouse#89421 from ClickHouse/backport/25.8/89177

divanik · web-flow · commit 301e81f49cfe · 2025-11-06T14:45:20.000+01:00
Backport ClickHouse#89177 to 25.8: Calculate all subquery sets inplace before Iceberg partition pruning
diff --git a/src/Processors/QueryPlan/ReadFromObjectStorageStep.cpp b/src/Processors/QueryPlan/ReadFromObjectStorageStep.cpp
@@ -15,6 +15,7 @@
 #include <Formats/FormatFactory.h>
 #include <IO/ReadBufferFromString.h>
 #include <Interpreters/Context.h>
+#include <Storages/VirtualColumnUtils.h>
 
 
 namespace DB
@@ -61,6 +62,14 @@ QueryPlanStepPtr ReadFromObjectStorageStep::clone() const
 void ReadFromObjectStorageStep::applyFilters(ActionDAGNodes added_filter_nodes)
 {
     SourceStepWithFilter::applyFilters(std::move(added_filter_nodes));
+    // It is important to build the inplace sets for the filter here, before reading data from object storage.
+    // If we delay building these sets until later in the pipeline, the filter can be applied after the data
+    // has already been read, potentially in parallel across many streams. This can significantly reduce the
+    // effectiveness of an Iceberg partition pruning, as unnecessary data may be read. Additionally, building ordered sets
+    // at this stage enables the KeyCondition class to apply more efficient optimizations than for unordered sets.
+    if (!filter_actions_dag)
+        return;
+    VirtualColumnUtils::buildOrderedSetsForDAG(*filter_actions_dag, getContext());
 }
 
 void ReadFromObjectStorageStep::updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info_value)
diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
@@ -64,7 +64,7 @@ namespace DB
 namespace VirtualColumnUtils
 {
 
-void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context)
+void buildSetsForDagImpl(const ActionsDAG & dag, const ContextPtr & context, bool ordered)
 {
     for (const auto & node : dag.getNodes())
     {
@@ -80,13 +80,28 @@ void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context)
                 if (!future_set->get())
                 {
                     if (auto * set_from_subquery = typeid_cast<FutureSetFromSubquery *>(future_set.get()))
-                        set_from_subquery->buildSetInplace(context);
+                    {
+                        if (ordered)
+                            set_from_subquery->buildOrderedSetInplace(context);
+                        else
+                            set_from_subquery->buildSetInplace(context);
+                    }
                 }
             }
         }
     }
 }
 
+void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context)
+{
+    buildSetsForDagImpl(dag, context, /* ordered = */ false);
+}
+
+void buildOrderedSetsForDAG(const ActionsDAG & dag, const ContextPtr & context)
+{
+    buildSetsForDagImpl(dag, context, /* ordered = */ true);
+}
+
 ExpressionActionsPtr buildFilterExpression(ActionsDAG dag, ContextPtr context)
 {
     buildSetsForDAG(dag, context);
diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h
@@ -40,6 +40,9 @@ void filterBlockWithExpression(const ExpressionActionsPtr & actions, Block & blo
 /// Builds sets used by ActionsDAG inplace.
 void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context);
 
+/// Builds ordered sets used by ActionsDAG inplace.
+void buildOrderedSetsForDAG(const ActionsDAG & dag, const ContextPtr & context);
+
 /// Checks if all functions used in DAG are deterministic.
 bool isDeterministic(const ActionsDAG::Node * node);
 
diff --git a/tests/integration/test_storage_iceberg/test.py b/tests/integration/test_storage_iceberg/test.py
@@ -3199,3 +3199,73 @@ def execute_spark_query(query: str):
         except:
             print("Dictionary: {}, Allowed Content Types: {}".format(diction, allowed_content_types))
             raise
+
+
+@pytest.mark.parametrize(
+    "storage_type",
+    ["s3", "azure", "local"],
+)
+def test_partition_pruning_with_subquery_set(started_cluster, storage_type):
+    instance = started_cluster.instances["node1"]
+    spark = started_cluster.spark_session
+    TABLE_NAME = "test_partition_pruning_" + storage_type + "_" + get_uuid_str()
+    IN_MEMORY_TABLE = "in_memory_table_" + get_uuid_str()
+
+    def execute_spark_query(query: str):
+        return execute_spark_query_general(
+            spark,
+            started_cluster,
+            storage_type,
+            TABLE_NAME,
+            query,
+        )
+
+    execute_spark_query(
+        f"""
+            CREATE TABLE {TABLE_NAME} (
+                id INT,
+                data STRING
+            )
+            USING iceberg
+            PARTITIONED BY (identity(id))
+            OPTIONS('format-version'='2')
+        """
+    )
+
+    execute_spark_query(
+        f"""
+        INSERT INTO {TABLE_NAME} VALUES
+        (1, 'a'),
+        (2, 'b'),
+        (3, 'c'),
+        (4, 'd'),
+        (5, 'e');
+    """
+    )
+
+
+    creation_expression = get_creation_expression(
+        storage_type, TABLE_NAME, started_cluster, table_function=True
+    )
+
+    instance.query(f"CREATE TABLE {IN_MEMORY_TABLE} (id INT) ENGINE = Memory")
+    instance.query(f"INSERT INTO {IN_MEMORY_TABLE} VALUES (2), (4)")
+
+
+    def check_validity_and_get_prunned_files(select_expression):
+        settings1 = {
+            "use_iceberg_partition_pruning": 0
+        }
+        settings2 = {
+            "use_iceberg_partition_pruning": 1
+        }
+        return check_validity_and_get_prunned_files_general(
+            instance, TABLE_NAME, settings1, settings2, 'IcebergPartitionPrunedFiles', select_expression
+        )
+
+    assert (
+        check_validity_and_get_prunned_files(
+            f"SELECT * FROM {creation_expression} WHERE id in (SELECT id FROM {IN_MEMORY_TABLE}) ORDER BY ALL"
+        )
+        == 3
+    )
diff --git a/tests/queries/0_stateless/03275_auto_cluster_functions_with_parallel_replicas.reference b/tests/queries/0_stateless/03275_auto_cluster_functions_with_parallel_replicas.reference
@@ -6,9 +6,6 @@ CreatingSets (Create sets before main query execution)
   Expression ((Project names + Projection))
     Filter ((WHERE + Change column names to column identifiers))
       ReadFromObjectStorage
-  CreatingSet (Create set for subquery)
-    Expression ((Project names + (Projection + Change column names to column identifiers)))
-      ReadFromObjectStorage
 Expression ((Project names + Projection))
   Aggregating
     Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))

Original file line number	Diff line number	Diff line change
`@@ -64,7 +64,7 @@ namespace DB`
`64`	`64`	`namespace VirtualColumnUtils`
`65`	`65`	`{`
`66`	`66`
`67`		`-void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context)`
	`67`	`+void buildSetsForDagImpl(const ActionsDAG & dag, const ContextPtr & context, bool ordered)`
`68`	`68`	`{`
`69`	`69`	`for (const auto & node : dag.getNodes())`
`70`	`70`	`{`
`@@ -80,13 +80,28 @@ void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context)`
`80`	`80`	`if (!future_set->get())`
`81`	`81`	`{`
`82`	`82`	`if (auto * set_from_subquery = typeid_cast<FutureSetFromSubquery *>(future_set.get()))`
`83`		`- set_from_subquery->buildSetInplace(context);`
	`83`	`+ {`
	`84`	`+ if (ordered)`
	`85`	`+ set_from_subquery->buildOrderedSetInplace(context);`
	`86`	`+ else`
	`87`	`+ set_from_subquery->buildSetInplace(context);`
	`88`	`+ }`
`84`	`89`	`}`
`85`	`90`	`}`
`86`	`91`	`}`
`87`	`92`	`}`
`88`	`93`	`}`
`89`	`94`
	`95`	`+void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context)`
	`96`	`+{`
	`97`	`+ buildSetsForDagImpl(dag, context, /* ordered = */ false);`
	`98`	`+}`
	`99`	`+`
	`100`	`+void buildOrderedSetsForDAG(const ActionsDAG & dag, const ContextPtr & context)`
	`101`	`+{`
	`102`	`+ buildSetsForDagImpl(dag, context, /* ordered = */ true);`
	`103`	`+}`
	`104`	`+`
`90`	`105`	`ExpressionActionsPtr buildFilterExpression(ActionsDAG dag, ContextPtr context)`
`91`	`106`	`{`
`92`	`107`	`buildSetsForDAG(dag, context);`