Skip to content

Commit e750e7b

Browse files
andygroveclaude
andcommitted
feat: add sort-based shuffle support to flight service
Update the flight server's do_get handler to detect sort-based shuffle files and read only the relevant partition using the index file. This enables remote shuffle reads for sort-based shuffle. Co-Authored-By: Claude Opus 4.5 <[email protected]>
1 parent 0445c48 commit e750e7b

File tree

1 file changed

+40
-2
lines changed

1 file changed

+40
-2
lines changed

ballista/executor/src/flight_service.rs

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,16 @@
2020
use datafusion::arrow::ipc::reader::StreamReader;
2121
use std::convert::TryFrom;
2222
use std::fs::File;
23+
use std::path::Path;
2324
use std::pin::Pin;
2425
use tokio_util::io::ReaderStream;
2526

2627
use arrow_flight::encode::FlightDataEncoderBuilder;
2728
use arrow_flight::error::FlightError;
2829
use ballista_core::error::BallistaError;
30+
use ballista_core::execution_plans::sort_shuffle::{
31+
get_index_path, is_sort_shuffle_output, stream_sort_shuffle_partition,
32+
};
2933
use ballista_core::serde::decode_protobuf;
3034
use ballista_core::serde::scheduler::Action as BallistaAction;
3135
use datafusion::arrow::ipc::CompressionType;
@@ -95,8 +99,42 @@ impl FlightService for BallistaFlightService {
9599
decode_protobuf(&ticket.ticket).map_err(|e| from_ballista_err(&e))?;
96100

97101
match &action {
98-
BallistaAction::FetchPartition { path, .. } => {
99-
debug!("FetchPartition reading {path}");
102+
BallistaAction::FetchPartition {
103+
path, partition_id, ..
104+
} => {
105+
debug!("FetchPartition reading partition {partition_id} from {path}");
106+
let data_path = Path::new(path);
107+
108+
// Check if this is a sort-based shuffle output
109+
if is_sort_shuffle_output(data_path) {
110+
debug!("Detected sort-based shuffle format for {path}");
111+
let index_path = get_index_path(data_path);
112+
let stream = stream_sort_shuffle_partition(
113+
data_path,
114+
&index_path,
115+
*partition_id,
116+
)
117+
.map_err(|e| from_ballista_err(&e))?;
118+
119+
let schema = stream.schema();
120+
// Map DataFusionError to FlightError
121+
let stream = stream.map_err(|e| FlightError::from(ArrowError::from(e)));
122+
123+
let write_options: IpcWriteOptions = IpcWriteOptions::default()
124+
.try_with_compression(Some(CompressionType::LZ4_FRAME))
125+
.map_err(|e| from_arrow_err(&e))?;
126+
let flight_data_stream = FlightDataEncoderBuilder::new()
127+
.with_schema(schema)
128+
.with_options(write_options)
129+
.build(stream)
130+
.map_err(|err| Status::from_error(Box::new(err)));
131+
132+
return Ok(Response::new(
133+
Box::pin(flight_data_stream) as Self::DoGetStream
134+
));
135+
}
136+
137+
// Standard hash-based shuffle - read the entire file
100138
let file = File::open(path)
101139
.map_err(|e| {
102140
BallistaError::General(format!(

0 commit comments

Comments
 (0)