1- use super :: combined:: CombinedRecordBatchStream ;
21use crate :: channel_manager_ext:: get_distributed_channel_resolver;
3- use crate :: common:: ComposedPhysicalExtensionCodec ;
42use crate :: config_extension_ext:: ContextGrpcMetadata ;
5- use crate :: errors:: tonic_status_to_datafusion_error ;
3+ use crate :: errors:: { map_flight_to_datafusion_error , map_status_to_datafusion_error } ;
64use crate :: flight_service:: { DoGet , StageKey } ;
75use crate :: plan:: DistributedCodec ;
86use crate :: stage:: { proto_from_stage, ExecutionStage } ;
9- use crate :: user_codec_ext:: get_distributed_user_codec;
107use crate :: ChannelResolver ;
118use arrow_flight:: decode:: FlightRecordBatchStream ;
129use arrow_flight:: error:: FlightError ;
@@ -20,15 +17,14 @@ use datafusion::physical_expr::{EquivalenceProperties, Partitioning};
2017use datafusion:: physical_plan:: execution_plan:: { Boundedness , EmissionType } ;
2118use datafusion:: physical_plan:: stream:: RecordBatchStreamAdapter ;
2219use datafusion:: physical_plan:: { DisplayAs , DisplayFormatType , ExecutionPlan , PlanProperties } ;
23- use futures:: { future , TryFutureExt , TryStreamExt } ;
20+ use futures:: { StreamExt , TryFutureExt , TryStreamExt } ;
2421use http:: Extensions ;
2522use prost:: Message ;
2623use std:: any:: Any ;
2724use std:: fmt:: Formatter ;
2825use std:: sync:: Arc ;
2926use tonic:: metadata:: MetadataMap ;
3027use tonic:: Request ;
31- use url:: Url ;
3228
3329/// This node has two variants.
3430/// 1. Pending: it acts as a placeholder for the distributed optimization step to mark it as ready.
@@ -187,115 +183,66 @@ impl ExecutionPlan for ArrowFlightReadExec {
187183 . session_config ( )
188184 . get_extension :: < ContextGrpcMetadata > ( ) ;
189185
190- let mut combined_codec = ComposedPhysicalExtensionCodec :: default ( ) ;
191- combined_codec. push ( DistributedCodec { } ) ;
192- if let Some ( ref user_codec) = get_distributed_user_codec ( context. session_config ( ) ) {
193- combined_codec. push_arc ( Arc :: clone ( user_codec) ) ;
194- }
186+ let codec = DistributedCodec :: new_combined_with_user ( context. session_config ( ) ) ;
195187
196- let child_stage_proto = proto_from_stage ( child_stage, & combined_codec ) . map_err ( |e| {
188+ let child_stage_proto = proto_from_stage ( child_stage, & codec ) . map_err ( |e| {
197189 internal_datafusion_err ! ( "ArrowFlightReadExec: failed to convert stage to proto: {e}" )
198190 } ) ?;
199191
200- let schema = child_stage. plan . schema ( ) ;
201-
202192 let child_stage_tasks = child_stage. tasks . clone ( ) ;
203193 let child_stage_num = child_stage. num as u64 ;
204194 let query_id = stage. query_id . to_string ( ) ;
205195
206- let stream = async move {
207- let futs = child_stage_tasks. iter ( ) . enumerate ( ) . map ( |( i, task) | {
208- let child_stage_proto = child_stage_proto. clone ( ) ;
209- let channel_resolver = channel_resolver. clone ( ) ;
210- let schema = schema. clone ( ) ;
211- let query_id = query_id. clone ( ) ;
212- let flight_metadata = flight_metadata
213- . as_ref ( )
214- . map ( |v| v. as_ref ( ) . clone ( ) )
215- . unwrap_or_default ( ) ;
216- let key = StageKey {
217- query_id,
218- stage_id : child_stage_num,
219- task_number : i as u64 ,
220- } ;
221- async move {
222- let url = task. url ( ) ?. ok_or ( internal_datafusion_err ! (
223- "ArrowFlightReadExec: task is unassigned, cannot proceed"
224- ) ) ?;
196+ let context_headers = flight_metadata
197+ . as_ref ( )
198+ . map ( |v| v. as_ref ( ) . clone ( ) )
199+ . unwrap_or_default ( ) ;
225200
226- let ticket_bytes = DoGet {
227- stage_proto : Some ( child_stage_proto) ,
201+ let stream = child_stage_tasks. into_iter ( ) . enumerate ( ) . map ( |( i, task) | {
202+ let channel_resolver = Arc :: clone ( & channel_resolver) ;
203+
204+ let ticket = Request :: from_parts (
205+ MetadataMap :: from_headers ( context_headers. 0 . clone ( ) ) ,
206+ Extensions :: default ( ) ,
207+ Ticket {
208+ ticket : DoGet {
209+ stage_proto : Some ( child_stage_proto. clone ( ) ) ,
228210 partition : partition as u64 ,
229- stage_key : Some ( key) ,
211+ stage_key : Some ( StageKey {
212+ query_id : query_id. clone ( ) ,
213+ stage_id : child_stage_num,
214+ task_number : i as u64 ,
215+ } ) ,
230216 task_number : i as u64 ,
231217 }
232218 . encode_to_vec ( )
233- . into ( ) ;
219+ . into ( ) ,
220+ } ,
221+ ) ;
234222
235- let ticket = Ticket {
236- ticket : ticket_bytes,
237- } ;
223+ async move {
224+ let url = task. url ( ) ?. ok_or ( internal_datafusion_err ! (
225+ "ArrowFlightReadExec: task is unassigned, cannot proceed"
226+ ) ) ?;
238227
239- stream_from_stage_task (
240- ticket,
241- flight_metadata,
242- & url,
243- schema. clone ( ) ,
244- & channel_resolver,
245- )
228+ let channel = channel_resolver. get_channel_for_url ( & url) . await ?;
229+ let stream = FlightServiceClient :: new ( channel)
230+ . do_get ( ticket)
246231 . await
247- }
248- } ) ;
232+ . map_err ( map_status_to_datafusion_error) ?
233+ . into_inner ( )
234+ . map_err ( |err| FlightError :: Tonic ( Box :: new ( err) ) ) ;
249235
250- let streams = future:: try_join_all ( futs) . await ?;
251-
252- let combined_stream = CombinedRecordBatchStream :: try_new ( schema, streams) ?;
253-
254- Ok ( combined_stream)
255- }
256- . try_flatten_stream ( ) ;
236+ Ok ( FlightRecordBatchStream :: new_from_flight_data ( stream)
237+ . map_err ( map_flight_to_datafusion_error) )
238+ }
239+ . try_flatten_stream ( )
240+ . boxed ( )
241+ } ) ;
257242
258243 Ok ( Box :: pin ( RecordBatchStreamAdapter :: new (
259244 self . schema ( ) ,
260- stream,
245+ futures :: stream:: select_all ( stream ) ,
261246 ) ) )
262247 }
263248}
264-
265- async fn stream_from_stage_task (
266- ticket : Ticket ,
267- metadata : ContextGrpcMetadata ,
268- url : & Url ,
269- schema : SchemaRef ,
270- channel_manager : & impl ChannelResolver ,
271- ) -> Result < SendableRecordBatchStream , DataFusionError > {
272- let channel = channel_manager. get_channel_for_url ( url) . await ?;
273-
274- let ticket = Request :: from_parts (
275- MetadataMap :: from_headers ( metadata. 0 ) ,
276- Extensions :: default ( ) ,
277- ticket,
278- ) ;
279-
280- let mut client = FlightServiceClient :: new ( channel) ;
281- let stream = client
282- . do_get ( ticket)
283- . await
284- . map_err ( |err| {
285- tonic_status_to_datafusion_error ( & err)
286- . unwrap_or_else ( || DataFusionError :: External ( Box :: new ( err) ) )
287- } ) ?
288- . into_inner ( )
289- . map_err ( |err| FlightError :: Tonic ( Box :: new ( err) ) ) ;
290-
291- let stream = FlightRecordBatchStream :: new_from_flight_data ( stream) . map_err ( |err| match err {
292- FlightError :: Tonic ( status) => tonic_status_to_datafusion_error ( & status)
293- . unwrap_or_else ( || DataFusionError :: External ( Box :: new ( status) ) ) ,
294- err => DataFusionError :: External ( Box :: new ( err) ) ,
295- } ) ;
296-
297- Ok ( Box :: pin ( RecordBatchStreamAdapter :: new (
298- schema. clone ( ) ,
299- stream,
300- ) ) )
301- }
0 commit comments