Skip to content

Commit ed6a0c7

Browse files
authored
chore(query): improve distributed runtime filter (#16862)
* chore(query): improve distributed runtime filter * chore(code): refine code * chore(code): refine code * chore(code); fix make lint * chore(query): add scan id to fix same table index * chore(code): make lint * chore(pipeline): fix deserializer runtime filter key
1 parent 1d55d57 commit ed6a0c7

File tree

24 files changed

+265
-2
lines changed

24 files changed

+265
-2
lines changed

src/query/catalog/src/plan/datasource/datasource_plan.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ pub struct DataSourcePlan {
4747
pub data_mask_policy: Option<BTreeMap<FieldIndex, RemoteExpr>>,
4848

4949
pub table_index: usize,
50+
pub scan_id: usize,
5051
}
5152

5253
impl DataSourcePlan {

src/query/catalog/src/runtime_filter_info.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15+
use databend_common_base::base::tokio::sync::watch;
16+
use databend_common_base::base::tokio::sync::watch::Receiver;
17+
use databend_common_base::base::tokio::sync::watch::Sender;
1518
use databend_common_expression::Expr;
1619
use xorf::BinaryFuse16;
1720

@@ -62,4 +65,24 @@ impl RuntimeFilterInfo {
6265
pub fn is_empty(&self) -> bool {
6366
self.inlist.is_empty() && self.bloom.is_empty() && self.min_max.is_empty()
6467
}
68+
69+
pub fn is_blooms_empty(&self) -> bool {
70+
self.bloom.is_empty()
71+
}
72+
}
73+
74+
pub struct RuntimeFilterReady {
75+
pub runtime_filter_watcher: Sender<Option<bool>>,
76+
/// A dummy receiver to make runtime_filter_watcher channel open.
77+
pub _runtime_filter_dummy_receiver: Receiver<Option<bool>>,
78+
}
79+
80+
impl Default for RuntimeFilterReady {
81+
fn default() -> Self {
82+
let (watcher, dummy_receiver) = watch::channel(None);
83+
Self {
84+
runtime_filter_watcher: watcher,
85+
_runtime_filter_dummy_receiver: dummy_receiver,
86+
}
87+
}
6588
}

src/query/catalog/src/table_context.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ use crate::plan::PartInfoPtr;
7474
use crate::plan::Partitions;
7575
use crate::query_kind::QueryKind;
7676
use crate::runtime_filter_info::RuntimeFilterInfo;
77+
use crate::runtime_filter_info::RuntimeFilterReady;
7778
use crate::statistics::data_cache_statistics::DataCacheMetrics;
7879
use crate::table::Table;
7980

@@ -317,6 +318,14 @@ pub trait TableContext: Send + Sync {
317318

318319
fn set_runtime_filter(&self, filters: (usize, RuntimeFilterInfo));
319320

321+
fn set_runtime_filter_ready(&self, table_index: usize, ready: Arc<RuntimeFilterReady>);
322+
323+
fn get_runtime_filter_ready(&self, table_index: usize) -> Vec<Arc<RuntimeFilterReady>>;
324+
325+
fn set_wait_runtime_filter(&self, table_index: usize, need_to_wait: bool);
326+
327+
fn get_wait_runtime_filter(&self, table_index: usize) -> bool;
328+
320329
fn clear_runtime_filter(&self);
321330

322331
fn set_merge_into_join(&self, join: MergeIntoJoin);

src/query/service/src/interpreters/interpreter_copy_into_table.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ impl CopyIntoTableInterpreter {
137137
(
138138
CopyIntoTableSource::Stage(Box::new(PhysicalPlan::TableScan(TableScan {
139139
plan_id: 0,
140+
scan_id: 0,
140141
name_mapping,
141142
stat_info: None,
142143
table_index: None,

src/query/service/src/pipelines/builders/builder_aggregate.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ impl PipelineBuilder {
9797
}
9898

9999
pub(crate) fn build_aggregate_partial(&mut self, aggregate: &AggregatePartial) -> Result<()> {
100+
self.contain_sink_processor = true;
100101
self.build_pipeline(&aggregate.input)?;
101102

102103
let max_block_size = self.settings.get_max_block_size()?;

src/query/service/src/pipelines/builders/builder_join.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ impl PipelineBuilder {
163163
join_state.clone(),
164164
output_len,
165165
)?;
166+
build_state.add_runtime_filter_ready();
166167

167168
let create_sink_processor = |input| {
168169
Ok(ProcessorPtr::create(TransformHashJoinBuild::try_create(

src/query/service/src/pipelines/builders/builder_recluster.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ impl PipelineBuilder {
6868
update_stream_columns: table.change_tracking_enabled(),
6969
data_mask_policy: None,
7070
table_index: usize::MAX,
71+
scan_id: usize::MAX,
7172
};
7273

7374
{

src/query/service/src/pipelines/builders/builder_scan.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ impl PipelineBuilder {
4141
pub(crate) fn build_table_scan(&mut self, scan: &TableScan) -> Result<()> {
4242
let table = self.ctx.build_table_from_source_plan(&scan.source)?;
4343
self.ctx.set_partitions(scan.source.parts.clone())?;
44+
self.ctx
45+
.set_wait_runtime_filter(scan.scan_id, self.contain_sink_processor);
4446
table.read_data(
4547
self.ctx.clone(),
4648
&scan.source,

src/query/service/src/pipelines/pipeline_builder.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ pub struct PipelineBuilder {
6060

6161
pub r_cte_scan_interpreters: Vec<CreateTableInterpreter>,
6262
pub(crate) is_exchange_neighbor: bool,
63+
64+
pub contain_sink_processor: bool,
6365
}
6466

6567
impl PipelineBuilder {
@@ -83,6 +85,7 @@ impl PipelineBuilder {
8385
hash_join_states: HashMap::new(),
8486
r_cte_scan_interpreters: vec![],
8587
is_exchange_neighbor: false,
88+
contain_sink_processor: false,
8689
}
8790
}
8891

src/query/service/src/pipelines/processors/transforms/hash_join/build_state.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15+
use std::sync::Arc;
16+
17+
use databend_common_catalog::runtime_filter_info::RuntimeFilterReady;
1518
use databend_common_expression::types::DataType;
1619
use databend_common_expression::ColumnVec;
1720
use databend_common_expression::DataBlock;
@@ -23,6 +26,7 @@ pub struct BuildState {
2326
pub(crate) outer_scan_map: Vec<Vec<bool>>,
2427
/// LeftMarkScan map, initialized at `HashJoinBuildState`, used in `HashJoinProbeState`
2528
pub(crate) mark_scan_map: Vec<Vec<u8>>,
29+
pub(crate) runtime_filter_ready: Vec<Arc<RuntimeFilterReady>>,
2630
}
2731

2832
impl BuildState {
@@ -31,6 +35,7 @@ impl BuildState {
3135
generation_state: BuildBlockGenerationState::new(),
3236
outer_scan_map: Vec::new(),
3337
mark_scan_map: Vec::new(),
38+
runtime_filter_ready: Vec::new(),
3439
}
3540
}
3641
}

0 commit comments

Comments
 (0)