Skip to content
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 111 additions & 8 deletions src/plan/codec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,22 +84,30 @@ impl PhysicalExtensionCodec for DistributedCodec {
buf: &mut Vec<u8>,
) -> datafusion::common::Result<()> {
if let Some(node) = node.as_any().downcast_ref::<ArrowFlightReadExec>() {
ArrowFlightReadExecProto {
let inner = ArrowFlightReadExecProto {
schema: Some(node.schema().try_into()?),
partitioning: Some(serialize_partitioning(
node.properties().output_partitioning(),
&DistributedCodec {},
)?),
stage_num: node.stage_num as u64,
}
.encode(buf)
.map_err(|err| proto_error(format!("{err}")))
};

let wrapper = DistributedExecProto {
node: Some(DistributedExecNode::ArrowFlightReadExec(inner)),
};

wrapper.encode(buf).map_err(|e| proto_error(format!("{e}")))
} else if let Some(node) = node.as_any().downcast_ref::<PartitionIsolatorExec>() {
PartitionIsolatorExecProto {
let inner = PartitionIsolatorExecProto {
partition_count: node.partition_count as u64,
}
.encode(buf)
.map_err(|err| proto_error(format!("{err}")))
};

let wrapper = DistributedExecProto {
node: Some(DistributedExecNode::PartitionIsolatorExec(inner)),
};

wrapper.encode(buf).map_err(|e| proto_error(format!("{e}")))
} else {
Err(proto_error(format!("Unexpected plan {}", node.name())))
}
Expand Down Expand Up @@ -138,3 +146,98 @@ pub struct ArrowFlightReadExecProto {
#[prost(uint64, tag = "3")]
stage_num: u64,
}

#[cfg(test)]
mod tests {
use super::*;
use datafusion::arrow::datatypes::{DataType, Field};
use datafusion::{
execution::registry::MemoryFunctionRegistry,
physical_expr::{expressions::col, expressions::Column, Partitioning, PhysicalSortExpr},
physical_plan::{displayable, sorts::sort::SortExec, union::UnionExec, ExecutionPlan},
};

type TestCase = (
&'static str,
Arc<dyn ExecutionPlan>,
Vec<Arc<dyn ExecutionPlan>>,
);

fn schema_i32(name: &str) -> Arc<Schema> {
Arc::new(Schema::new(vec![Field::new(name, DataType::Int32, false)]))
}

fn repr(plan: &Arc<dyn ExecutionPlan>) -> String {
displayable(plan.as_ref()).indent(true).to_string()
}

#[test]
fn distributed_codec_roundtrips() -> datafusion::common::Result<()> {
let codec = DistributedCodec;
let registry = MemoryFunctionRegistry::new();

let mut cases: Vec<TestCase> = Vec::new();

// ArrowFlightReadExec
let schema = schema_i32("a");
let part = Partitioning::Hash(vec![Arc::new(Column::new("a", 0))], 4);
let plan: Arc<dyn ExecutionPlan> = Arc::new(ArrowFlightReadExec::new(part, schema, 0));
cases.push(("single_flight", plan, vec![]));

// PartitionIsolatorExec -> ArrowFlightReadExec
let schema = schema_i32("b");
let flight = Arc::new(ArrowFlightReadExec::new(
Partitioning::UnknownPartitioning(1),
schema,
0,
));
let plan: Arc<dyn ExecutionPlan> = Arc::new(PartitionIsolatorExec::new(flight.clone(), 3));
cases.push(("isolator_flight", plan, vec![flight]));

// PartitionIsolatorExec -> UnionExec(ArrowFlightReadExec)
let schema = schema_i32("c");
let left = Arc::new(ArrowFlightReadExec::new(
Partitioning::RoundRobinBatch(2),
schema.clone(),
0,
));
let right = Arc::new(ArrowFlightReadExec::new(
Partitioning::RoundRobinBatch(2),
schema.clone(),
1,
));
let union = Arc::new(UnionExec::new(vec![left.clone(), right.clone()]));
let plan: Arc<dyn ExecutionPlan> = Arc::new(PartitionIsolatorExec::new(union.clone(), 5));
cases.push(("isolator_union", plan, vec![union]));

// PartitionIsolatorExec -> SortExec -> ArrowFlightReadExec
let schema = schema_i32("d");
let flight = Arc::new(ArrowFlightReadExec::new(
Partitioning::UnknownPartitioning(1),
schema.clone(),
0,
));
let sort_expr = PhysicalSortExpr {
expr: col("d", &schema)?,
options: Default::default(),
};
let sort = Arc::new(SortExec::new(vec![sort_expr].into(), flight.clone()));
let plan: Arc<dyn ExecutionPlan> = Arc::new(PartitionIsolatorExec::new(sort.clone(), 2));
cases.push(("isolator_sort_flight", plan, vec![sort]));

// Test each case
for (name, original, inputs) in cases {
let mut buf = Vec::new();
codec.try_encode(original.clone(), &mut buf)?;

let decoded = codec.try_decode(&buf, &inputs, &registry)?;

assert_eq!(
repr(&original),
repr(&decoded),
"mismatch after round-trip for {name}"
);
}
Ok(())
}
}