Skip to content
Merged
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
a6e01e7
Split channel resolver in two
gabotechs Dec 22, 2025
fc9bfc8
Simplify WorkerResolverExtension and ChannelResolverExtension
gabotechs Dec 22, 2025
9e15f2b
Add default builder to ArrowFlightEndpoint
gabotechs Dec 22, 2025
34cf529
Add some docs
gabotechs Dec 22, 2025
312901d
Listen to clippy
gabotechs Dec 23, 2025
f026e41
Split get_flight_client_for_url in two
gabotechs Dec 23, 2025
2508e48
Fix conflicts
gabotechs Dec 24, 2025
f7218b0
Remove unnecessary channel resolver
gabotechs Dec 25, 2025
b49289a
Improve WorkerResolver docs
gabotechs Dec 26, 2025
793f898
Use one ChannelResolver per runtime
gabotechs Dec 26, 2025
eaad60f
Improve error reporting on client connection failure
gabotechs Dec 26, 2025
ea4e09a
Add a from_session_builder method for constructing an InMemoryChannel…
gabotechs Dec 26, 2025
33b0cc7
Add ChannelResolver and WorkerResolver default implementations for Arcs
gabotechs Dec 26, 2025
1aeb719
Make TPC-DS tests use DataFusion test dataset
gabotechs Dec 24, 2025
e377698
Remove non-working in-memory option from benchmarks
gabotechs Dec 24, 2025
7a0b296
Remove unnecessary utils folder
gabotechs Dec 24, 2025
41f90a1
Refactor benchmark folder
gabotechs Dec 24, 2025
c88058e
Rename to prepare_tpch.rs
gabotechs Dec 24, 2025
b3bdd2b
Adapt benchmarks for TPC-DS
gabotechs Dec 24, 2025
05a30cc
Update benchmarks README.md
gabotechs Dec 24, 2025
0c736fd
Fix conflicts
gabotechs Dec 24, 2025
f9f4439
Use default session state builder
gabotechs Dec 26, 2025
21e8581
Update benchmarks README.md
gabotechs Dec 27, 2025
c306c6d
add broadcast join
gene-bordegaray Dec 27, 2025
12512af
don't distribute 1 consumer tasks
gene-bordegaray Dec 28, 2025
8927012
fix analyze tests
gene-bordegaray Dec 28, 2025
d13a28c
dont strictibute a single consumer, use coalesce
gene-bordegaray Dec 28, 2025
e0e5f50
intriduce broadcast operator that does caching
gene-bordegaray Jan 3, 2026
ec607b5
Merge branch 'main' into gene.bordegaray/2025/12/add_broadcast_exec
gene-bordegaray Jan 3, 2026
47d4ab9
refactored distributed planner to contain less broadcast logic and ad…
gene-bordegaray Jan 3, 2026
eae78c5
fix docs
gene-bordegaray Jan 3, 2026
7152752
add comment for follow up streaming work
gene-bordegaray Jan 3, 2026
83bfed2
add comment explaining cache solution for 1->1 task stage collapses
gene-bordegaray Jan 4, 2026
5d02692
refactor network broadcast to be cleaner
gene-bordegaray Jan 6, 2026
9669518
add new pass to the annotation
gene-bordegaray Jan 9, 2026
4a3af9a
put broadcast joins behind feature flag
gene-bordegaray Jan 12, 2026
9bcd287
Merge branch 'main' into gene.bordegaray/2025/12/add_broadcast_exec
gene-bordegaray Jan 12, 2026
b3cd8e0
ony distribute joins when broadcast enabled
gene-bordegaray Jan 13, 2026
92b81a6
add benchmark config and update docs
gene-bordegaray Jan 13, 2026
c1610a3
everything but tpcds query 75 works
gene-bordegaray Jan 14, 2026
2cebb41
propagagate unions correctly and don't allow left joins
gene-bordegaray Jan 17, 2026
1bd0922
refactor tests
gene-bordegaray Jan 17, 2026
b8064e2
Merge branch 'main' into gene.bordegaray/2025/12/add_broadcast_exec
gene-bordegaray Jan 17, 2026
c50f9b5
add comments
gene-bordegaray Jan 17, 2026
a4392a4
fix broadcast with new children
gene-bordegaray Jan 17, 2026
f1922d7
fix benches
gene-bordegaray Jan 17, 2026
728a031
add insert broaaadcast tests
gene-bordegaray Jan 17, 2026
5f884ac
refactor / address comments
gene-bordegaray Jan 18, 2026
c9d2a11
remove no-op check
gene-bordegaray Jan 18, 2026
c271cd5
remove check to allow build side to cap join
gene-bordegaray Jan 23, 2026
0809dee
Merge branch 'main' into gene.bordegaray/2025/12/add_broadcast_exec
gene-bordegaray Jan 23, 2026
3c0d1c1
fix indenting
gene-bordegaray Jan 23, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
189 changes: 131 additions & 58 deletions src/distributed_planner/distributed_physical_optimizer_rule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ use crate::distributed_planner::plan_annotator::{
AnnotatedPlan, RequiredNetworkBoundary, annotate_plan,
};
use crate::{
DistributedConfig, DistributedExec, NetworkCoalesceExec, NetworkShuffleExec, TaskEstimator,
BroadcastExec, DistributedConfig, DistributedExec, NetworkBroadcastExec, NetworkCoalesceExec,
NetworkShuffleExec, TaskCountAnnotation, TaskEstimator,
};
use datafusion::common::internal_err;
use datafusion::common::tree_node::{Transformed, TreeNode};
use datafusion::config::ConfigOptions;
use datafusion::error::DataFusionError;
Expand Down Expand Up @@ -88,21 +88,58 @@ fn distribute_plan(
stage_id: &mut usize,
) -> Result<Arc<dyn ExecutionPlan>, DataFusionError> {
let d_cfg = DistributedConfig::from_config_options(cfg)?;
let mut children = annotated_plan.children;
let parent_task_count = annotated_plan.task_count.as_usize();

let children = annotated_plan.children;
// This is a leaf node, so we need to scale it up with the final task count.
if children.is_empty() {
let scaled_up = d_cfg.__private_task_estimator.scale_up_leaf_node(
&annotated_plan.plan,
annotated_plan.task_count.as_usize(),
parent_task_count,
cfg,
);
return Ok(scaled_up.unwrap_or(annotated_plan.plan));
}

let parent_task_count = annotated_plan.task_count.as_usize();
let max_child_task_count = children.iter().map(|v| v.task_count.as_usize()).max();
// Broadcast requires different task counts for build vs probe.
if annotated_plan.required_network_boundary == Some(RequiredNetworkBoundary::Broadcast) {
let mut build = children.remove(0);
let mut probe = children.remove(0);

set_task_count_until_boundary(&mut probe, parent_task_count);
Copy link
Collaborator

@gabotechs gabotechs Jan 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any chance of handling this as a normal if nb_req == RequiredNetworkBoundary::* { ... } statement at the end of this function like the other network boundaries?

Also, one improvement that comes to mind for consistency with the other network boundaries, is to wrap all the NetworkBroadcastExec instantiation logic inside NetworkBroadcastExec::try_new instead of here.

For example, note how in NetworkShuffleExec::try_new we handle the logic of transforming the child nodes in order to adapt them to shuffles, rather than doing it in this function:

https://github.com/datafusion-contrib/datafusion-distributed/blob/main/src/execution_plans/network_shuffle.rs#L151-L151

Do you think we could reach a point were instantiation one NetworkBroadcastExec is no different than instantiating any of the other network boundaries?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

refactored this best I could, let me know if you have more ideas. Some logic still lives here as unlike other operators Broadcast relies on the annotated plan.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could extract logic into a helper, but seems like unneeded indirection since logic only happens once


// If there's only one consumer task, use Coalesce instead of Broadcast.
let build_child: Arc<dyn ExecutionPlan> = if parent_task_count == 1 {
set_task_count_until_boundary(&mut build, 1);
let build_side = distribute_plan(build, cfg, query_id, stage_id)?;
Arc::new(NetworkCoalesceExec::try_new(
build_side, query_id, *stage_id, 1, 1,
)?)
} else {
// Remove CoalescePartitionsExec since want multiple partitions flowing through
// BroadcastExec. Coalescing happens on consumer side.
let build_without_coalesce = unwrap_coalesce_partitions(build);
let build_task_count = build_without_coalesce.task_count.as_usize();
let build_side = distribute_plan(build_without_coalesce, cfg, query_id, stage_id)?;
let broadcast_exec = Arc::new(BroadcastExec::new(build_side, parent_task_count));

let network_broadcast = Arc::new(NetworkBroadcastExec::try_new(
broadcast_exec,
query_id,
*stage_id,
build_task_count,
)?);
// Add CoalescePartitionsExec above the network boundary on consumer side.
Arc::new(CoalescePartitionsExec::new(network_broadcast))
};
stage_id.add_assign(1);

let probe_side = distribute_plan(probe, cfg, query_id, stage_id)?;
return annotated_plan
.plan
.with_new_children(vec![build_child, probe_side]);
}

let max_child_task_count = children.iter().map(|v| v.task_count.as_usize()).max();
let new_children = children
.into_iter()
.map(|child| distribute_plan(child, cfg, query_id, stage_id))
Expand All @@ -119,39 +156,60 @@ fn distribute_plan(
return annotated_plan.plan.with_new_children(new_children);
}

// If the current node has a RepartitionExec below, it needs a shuffle, so put one
// NetworkShuffleExec boundary in between the RepartitionExec and the current node.
if nb_req == RequiredNetworkBoundary::Shuffle {
let new_child = Arc::new(NetworkShuffleExec::try_new(
require_one_child(new_children)?,
query_id,
*stage_id,
parent_task_count,
max_child_task_count.unwrap_or(1),
)?);
stage_id.add_assign(1);
return annotated_plan.plan.with_new_children(vec![new_child]);
match nb_req {
// If the current node has a RepartitionExec below, it needs a shuffle, so put one
// NetworkShuffleExec boundary in between the RepartitionExec and the current node.
RequiredNetworkBoundary::Shuffle => {
let new_child = Arc::new(NetworkShuffleExec::try_new(
require_one_child(new_children)?,
query_id,
*stage_id,
parent_task_count,
max_child_task_count.unwrap_or(1),
)?);
stage_id.add_assign(1);
annotated_plan.plan.with_new_children(vec![new_child])
}
// If this is a CoalescePartitionsExec or a SortMergePreservingExec, it means that the original
// plan is trying to merge all partitions into one. We need to go one step ahead and also merge
// all distributed tasks into one.
RequiredNetworkBoundary::Coalesce => {
let new_child = Arc::new(NetworkCoalesceExec::try_new(
require_one_child(new_children)?,
query_id,
*stage_id,
parent_task_count,
max_child_task_count.unwrap_or(1),
)?);
stage_id.add_assign(1);
annotated_plan.plan.with_new_children(vec![new_child])
}
RequiredNetworkBoundary::Broadcast => unreachable!("handled above"),
}
}

// If this is a CoalescePartitionsExec or a SortMergePreservingExec, it means that the original
// plan is trying to merge all partitions into one. We need to go one step ahead and also merge
// all distributed tasks into one.
if nb_req == RequiredNetworkBoundary::Coalesce {
let new_child = Arc::new(NetworkCoalesceExec::try_new(
require_one_child(new_children)?,
query_id,
*stage_id,
parent_task_count,
max_child_task_count.unwrap_or(1),
)?);
stage_id.add_assign(1);
return annotated_plan.plan.with_new_children(vec![new_child]);
fn set_task_count_until_boundary(plan: &mut AnnotatedPlan, task_count: usize) {
plan.task_count = TaskCountAnnotation::Desired(task_count);
if plan.required_network_boundary.is_none() {
for child in &mut plan.children {
set_task_count_until_boundary(child, task_count);
}
}
}

internal_err!(
"Unreachable code reached in distribute_plan. Could not determine how to place a network boundary below {}",
annotated_plan.plan.name()
)
/// Unwraps [CoalescePartitionsExec] if present and returns its child.
fn unwrap_coalesce_partitions(mut plan: AnnotatedPlan) -> AnnotatedPlan {
if plan
.plan
.as_any()
.downcast_ref::<CoalescePartitionsExec>()
.is_some()
&& !plan.children.is_empty()
{
plan.children.remove(0)
} else {
plan
}
}

/// Rearranges the [CoalesceBatchesExec] nodes in the plan so that they are placed right below
Expand Down Expand Up @@ -426,11 +484,23 @@ mod tests {
})
.await;
assert_snapshot!(plan, @r"
CoalesceBatchesExec: target_batch_size=8192
HashJoinExec: mode=CollectLeft, join_type=Left, on=[(RainToday@1, RainToday@1)], projection=[MinTemp@0, MaxTemp@2]
CoalescePartitionsExec
DataSourceExec: file_groups={3 groups: [[/testdata/weather/result-000000.parquet], [/testdata/weather/result-000001.parquet], [/testdata/weather/result-000002.parquet]]}, projection=[MinTemp, RainToday], file_type=parquet
DataSourceExec: file_groups={3 groups: [[/testdata/weather/result-000000.parquet], [/testdata/weather/result-000001.parquet], [/testdata/weather/result-000002.parquet]]}, projection=[MaxTemp, RainToday], file_type=parquet
┌───── DistributedExec ── Tasks: t0:[p0]
│ CoalescePartitionsExec
│ [Stage 2] => NetworkCoalesceExec: output_partitions=3, input_tasks=3
└──────────────────────────────────────────────────
┌───── Stage 2 ── Tasks: t0:[p0] t1:[p1] t2:[p2]
│ CoalesceBatchesExec: target_batch_size=8192
│ HashJoinExec: mode=CollectLeft, join_type=Left, on=[(RainToday@1, RainToday@1)], projection=[MinTemp@0, MaxTemp@2]
│ CoalescePartitionsExec
│ [Stage 1] => NetworkBroadcastExec: partitions_per_consumer=1, stage_partitions=3, input_tasks=3
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is very cool. I think we will be able to display clearer information here. Let us chat

│ PartitionIsolatorExec: t0:[p0,__,__] t1:[__,p0,__] t2:[__,__,p0]
│ DataSourceExec: file_groups={3 groups: [[/testdata/weather/result-000000.parquet], [/testdata/weather/result-000001.parquet], [/testdata/weather/result-000002.parquet]]}, projection=[MaxTemp, RainToday], file_type=parquet
└──────────────────────────────────────────────────
┌───── Stage 1 ── Tasks: t0:[p0..p2] t1:[p3..p5] t2:[p6..p8]
│ BroadcastExec: input_partitions=1, consumer_tasks=3, output_partitions=3
│ PartitionIsolatorExec: t0:[p0,__,__] t1:[__,p0,__] t2:[__,__,p0]
│ DataSourceExec: file_groups={3 groups: [[/testdata/weather/result-000000.parquet], [/testdata/weather/result-000001.parquet], [/testdata/weather/result-000002.parquet]]}, projection=[MinTemp, RainToday], file_type=parquet
└──────────────────────────────────────────────────
");
}

Expand Down Expand Up @@ -468,28 +538,31 @@ mod tests {
│ CoalescePartitionsExec
│ CoalesceBatchesExec: target_batch_size=8192
│ HashJoinExec: mode=CollectLeft, join_type=Left, on=[(RainTomorrow@1, RainTomorrow@1)], projection=[MinTemp@0, MaxTemp@2]
│ CoalescePartitionsExec
│ [Stage 2] => NetworkCoalesceExec: output_partitions=8, input_tasks=2
│ [Stage 3] => NetworkCoalesceExec: output_partitions=1, input_tasks=1
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This still looks wrong. The DistributedExec stage and Stage 3 should be collapsing into 1 without any repercussion in correctness. We should be able to accomplish that in this PR.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree. We do not want to change available tests. New tests should be added for new bahavior

│ ProjectionExec: expr=[avg(weather.MaxTemp)@1 as MaxTemp, RainTomorrow@0 as RainTomorrow]
│ AggregateExec: mode=FinalPartitioned, gby=[RainTomorrow@0 as RainTomorrow], aggr=[avg(weather.MaxTemp)]
│ [Stage 3] => NetworkShuffleExec: output_partitions=4, input_tasks=3
│ [Stage 4] => NetworkShuffleExec: output_partitions=4, input_tasks=3
└──────────────────────────────────────────────────
┌───── Stage 2 ── Tasks: t0:[p0..p3] t1:[p0..p3]
│ ProjectionExec: expr=[avg(weather.MinTemp)@1 as MinTemp, RainTomorrow@0 as RainTomorrow]
│ AggregateExec: mode=FinalPartitioned, gby=[RainTomorrow@0 as RainTomorrow], aggr=[avg(weather.MinTemp)]
│ [Stage 1] => NetworkShuffleExec: output_partitions=4, input_tasks=3
┌───── Stage 3 ── Tasks: t0:[p0]
│ CoalescePartitionsExec
│ [Stage 2] => NetworkCoalesceExec: output_partitions=8, input_tasks=2
└──────────────────────────────────────────────────
┌───── Stage 1 ── Tasks: t0:[p0..p7] t1:[p0..p7] t2:[p0..p7]
│ CoalesceBatchesExec: target_batch_size=8192
│ RepartitionExec: partitioning=Hash([RainTomorrow@0], 8), input_partitions=4
│ AggregateExec: mode=Partial, gby=[RainTomorrow@1 as RainTomorrow], aggr=[avg(weather.MinTemp)]
│ CoalesceBatchesExec: target_batch_size=8192
│ FilterExec: RainToday@1 = yes, projection=[MinTemp@0, RainTomorrow@2]
│ RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
│ PartitionIsolatorExec: t0:[p0,__,__] t1:[__,p0,__] t2:[__,__,p0]
│ DataSourceExec: file_groups={3 groups: [[/testdata/weather/result-000000.parquet], [/testdata/weather/result-000001.parquet], [/testdata/weather/result-000002.parquet]]}, projection=[MinTemp, RainToday, RainTomorrow], file_type=parquet, predicate=RainToday@1 = yes, pruning_predicate=RainToday_null_count@2 != row_count@3 AND RainToday_min@0 <= yes AND yes <= RainToday_max@1, required_guarantees=[RainToday in (yes)]
┌───── Stage 2 ── Tasks: t0:[p0..p3] t1:[p0..p3]
│ ProjectionExec: expr=[avg(weather.MinTemp)@1 as MinTemp, RainTomorrow@0 as RainTomorrow]
│ AggregateExec: mode=FinalPartitioned, gby=[RainTomorrow@0 as RainTomorrow], aggr=[avg(weather.MinTemp)]
│ [Stage 1] => NetworkShuffleExec: output_partitions=4, input_tasks=3
└──────────────────────────────────────────────────
┌───── Stage 3 ── Tasks: t0:[p0..p3] t1:[p0..p3] t2:[p0..p3]
┌───── Stage 1 ── Tasks: t0:[p0..p7] t1:[p0..p7] t2:[p0..p7]
│ CoalesceBatchesExec: target_batch_size=8192
│ RepartitionExec: partitioning=Hash([RainTomorrow@0], 8), input_partitions=4
│ AggregateExec: mode=Partial, gby=[RainTomorrow@1 as RainTomorrow], aggr=[avg(weather.MinTemp)]
│ CoalesceBatchesExec: target_batch_size=8192
│ FilterExec: RainToday@1 = yes, projection=[MinTemp@0, RainTomorrow@2]
│ RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
│ PartitionIsolatorExec: t0:[p0,__,__] t1:[__,p0,__] t2:[__,__,p0]
│ DataSourceExec: file_groups={3 groups: [[/testdata/weather/result-000000.parquet], [/testdata/weather/result-000001.parquet], [/testdata/weather/result-000002.parquet]]}, projection=[MinTemp, RainToday, RainTomorrow], file_type=parquet, predicate=RainToday@1 = yes, pruning_predicate=RainToday_null_count@2 != row_count@3 AND RainToday_min@0 <= yes AND yes <= RainToday_max@1, required_guarantees=[RainToday in (yes)]
└──────────────────────────────────────────────────
┌───── Stage 4 ── Tasks: t0:[p0..p3] t1:[p0..p3] t2:[p0..p3]
│ CoalesceBatchesExec: target_batch_size=8192
│ RepartitionExec: partitioning=Hash([RainTomorrow@0], 4), input_partitions=4
│ AggregateExec: mode=Partial, gby=[RainTomorrow@1 as RainTomorrow], aggr=[avg(weather.MaxTemp)]
Expand Down
4 changes: 3 additions & 1 deletion src/distributed_planner/network_boundary.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::{NetworkCoalesceExec, NetworkShuffleExec, Stage};
use crate::{NetworkBroadcastExec, NetworkCoalesceExec, NetworkShuffleExec, Stage};
use datafusion::physical_plan::ExecutionPlan;
use std::sync::Arc;

Expand Down Expand Up @@ -35,6 +35,8 @@ impl NetworkBoundaryExt for dyn ExecutionPlan {
Some(node)
} else if let Some(node) = self.as_any().downcast_ref::<NetworkCoalesceExec>() {
Some(node)
} else if let Some(node) = self.as_any().downcast_ref::<NetworkBroadcastExec>() {
Some(node)
} else {
None
}
Expand Down
38 changes: 19 additions & 19 deletions src/distributed_planner/plan_annotator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use std::sync::Arc;
pub(super) enum RequiredNetworkBoundary {
Shuffle,
Coalesce,
Broadcast,
}

/// Wraps an [ExecutionPlan] and annotates it with information about how many distributed tasks
Expand Down Expand Up @@ -182,13 +183,6 @@ fn _annotate_plan(
count += annotated_child.task_count.as_usize();
}
task_count = Desired(count);
} else if let Some(node) = plan.as_any().downcast_ref::<HashJoinExec>()
&& node.mode == PartitionMode::CollectLeft
{
// We cannot distribute CollectLeft HashJoinExec nodes yet. Once
// https://github.com/datafusion-contrib/datafusion-distributed/pull/229 lands,
// we can remove this check.
task_count = Maximum(1);
} else {
// The task count for this plan is decided by the biggest task count from the children; unless
// a child specifies a maximum task count, in that case, the maximum is respected. Some
Expand Down Expand Up @@ -335,6 +329,12 @@ fn required_network_boundary_below(parent: &dyn ExecutionPlan) -> Option<Require
return Some(RequiredNetworkBoundary::Coalesce);
}

if let Some(hash_join) = parent.as_any().downcast_ref::<HashJoinExec>() {
if hash_join.partition_mode() == &PartitionMode::CollectLeft {
return Some(RequiredNetworkBoundary::Broadcast);
}
}

None
}

Expand Down Expand Up @@ -410,11 +410,11 @@ mod tests {
"#;
let annotated = sql_to_annotated(query).await;
assert_snapshot!(annotated, @r"
CoalesceBatchesExec: task_count=Maximum(1)
HashJoinExec: task_count=Maximum(1)
CoalescePartitionsExec: task_count=Maximum(1)
DataSourceExec: task_count=Maximum(1)
DataSourceExec: task_count=Maximum(1)
CoalesceBatchesExec: task_count=Desired(3)
HashJoinExec: task_count=Desired(3), required_network_boundary=Broadcast
CoalescePartitionsExec: task_count=Desired(3)
DataSourceExec: task_count=Desired(3)
DataSourceExec: task_count=Desired(3)
")
}

Expand Down Expand Up @@ -445,8 +445,8 @@ mod tests {
"#;
let annotated = sql_to_annotated(query).await;
assert_snapshot!(annotated, @r"
CoalesceBatchesExec: task_count=Maximum(1)
HashJoinExec: task_count=Maximum(1)
CoalesceBatchesExec: task_count=Desired(1)
HashJoinExec: task_count=Desired(1), required_network_boundary=Broadcast
CoalescePartitionsExec: task_count=Maximum(1), required_network_boundary=Coalesce
ProjectionExec: task_count=Desired(2)
AggregateExec: task_count=Desired(2)
Expand Down Expand Up @@ -476,11 +476,11 @@ mod tests {
"#;
let annotated = sql_to_annotated(query).await;
assert_snapshot!(annotated, @r"
CoalesceBatchesExec: task_count=Maximum(1)
HashJoinExec: task_count=Maximum(1)
CoalescePartitionsExec: task_count=Maximum(1)
DataSourceExec: task_count=Maximum(1)
DataSourceExec: task_count=Maximum(1)
CoalesceBatchesExec: task_count=Desired(3)
HashJoinExec: task_count=Desired(3), required_network_boundary=Broadcast
CoalescePartitionsExec: task_count=Desired(3)
DataSourceExec: task_count=Desired(3)
DataSourceExec: task_count=Desired(3)
")
}

Expand Down
Loading