@@ -31,32 +31,43 @@ use datafusion::{
3131pub const BALLISTA_JOB_NAME : & str = "ballista.job.name" ;
3232/// Configuration key for standalone processing parallelism.
3333pub const BALLISTA_STANDALONE_PARALLELISM : & str = "ballista.standalone.parallelism" ;
34+
35+ // Arrow IPC configuration
36+
37+ /// Configuration key for skipping redundant reader validation of Arrow IPC valid data.
38+ pub const BALLISTA_ARROW_IPC_READER_SKIP_VALIDATION : & str =
39+ "ballista.arrow.ipc.reader_skip_validation" ;
40+
41+ // gRPC configuration
42+
43+ /// Configuration key for gRPC client connection timeout in seconds.
44+ pub const BALLISTA_GRPC_CLIENT_CONNECT_TIMEOUT_SECONDS : & str =
45+ "ballista.grpc.client.connect_timeout_seconds" ;
46+ /// Configuration key for HTTP/2 keep-alive interval for gRPC clients in seconds.
47+ pub const BALLISTA_GRPC_CLIENT_HTTP2_KEEPALIVE_INTERVAL_SECONDS : & str =
48+ "ballista.grpc.client.http2_keepalive_interval_seconds" ;
3449/// max message size for gRPC clients
3550pub const BALLISTA_GRPC_CLIENT_MAX_MESSAGE_SIZE : & str =
3651 "ballista.grpc_client_max_message_size" ;
37- /// Configuration key for maximum concurrent shuffle read requests.
38- pub const BALLISTA_SHUFFLE_READER_MAX_REQUESTS : & str =
39- "ballista.shuffle.max_concurrent_read_requests" ;
52+ /// Configuration key for TCP keep-alive interval for gRPC clients in seconds.
53+ pub const BALLISTA_GRPC_CLIENT_TCP_KEEPALIVE_SECONDS : & str =
54+ "ballista.grpc.client.tcp_keepalive_seconds" ;
55+ /// Configuration key for gRPC client request timeout in seconds.
56+ pub const BALLISTA_GRPC_CLIENT_TIMEOUT_SECONDS : & str =
57+ "ballista.grpc.client.timeout_seconds" ;
58+
59+ // Shuffle reader configuration
60+
4061/// Configuration key to force remote reads even for local partitions.
4162pub const BALLISTA_SHUFFLE_READER_FORCE_REMOTE_READ : & str =
4263 "ballista.shuffle.force_remote_read" ;
64+ /// Configuration key for maximum concurrent shuffle read requests.
65+ pub const BALLISTA_SHUFFLE_READER_MAX_REQUESTS : & str =
66+ "ballista.shuffle.max_concurrent_read_requests" ;
4367/// Configuration key to prefer Flight protocol for remote shuffle reads.
4468pub const BALLISTA_SHUFFLE_READER_REMOTE_PREFER_FLIGHT : & str =
4569 "ballista.shuffle.remote_read_prefer_flight" ;
4670
47- /// Configuration key for gRPC client connection timeout in seconds.
48- pub const BALLISTA_GRPC_CLIENT_CONNECT_TIMEOUT_SECONDS : & str =
49- "ballista.grpc.client.connect_timeout_seconds" ;
50- /// Configuration key for gRPC client request timeout in seconds.
51- pub const BALLISTA_GRPC_CLIENT_TIMEOUT_SECONDS : & str =
52- "ballista.grpc.client.timeout_seconds" ;
53- /// Configuration key for TCP keep-alive interval for gRPC clients in seconds.
54- pub const BALLISTA_GRPC_CLIENT_TCP_KEEPALIVE_SECONDS : & str =
55- "ballista.grpc.client.tcp_keepalive_seconds" ;
56- /// Configuration key for HTTP/2 keep-alive interval for gRPC clients in seconds.
57- pub const BALLISTA_GRPC_CLIENT_HTTP2_KEEPALIVE_INTERVAL_SECONDS : & str =
58- "ballista.grpc.client.http2_keepalive_interval_seconds" ;
59-
6071/// Result type for configuration parsing operations.
6172pub type ParseResult < T > = result:: Result < T , String > ;
6273use std:: sync:: LazyLock ;
@@ -69,38 +80,49 @@ static CONFIG_ENTRIES: LazyLock<HashMap<String, ConfigEntry>> = LazyLock::new(||
6980 ConfigEntry :: new( BALLISTA_STANDALONE_PARALLELISM . to_string( ) ,
7081 "Standalone processing parallelism " . to_string( ) ,
7182 DataType :: UInt16 , Some ( std:: thread:: available_parallelism( ) . map( |v| v. get( ) ) . unwrap_or( 1 ) . to_string( ) ) ) ,
83+
84+ // Arrow IPC configuration
85+ ConfigEntry :: new( BALLISTA_ARROW_IPC_READER_SKIP_VALIDATION . to_string( ) ,
86+ "Skip redundant reader validation of Arrow IPC valid data" . to_string( ) ,
87+ DataType :: Boolean ,
88+ Some ( true . to_string( ) ) ) ,
89+
90+ // gRPC configuration
91+ ConfigEntry :: new( BALLISTA_GRPC_CLIENT_CONNECT_TIMEOUT_SECONDS . to_string( ) ,
92+ "Connection timeout for gRPC client in seconds" . to_string( ) ,
93+ DataType :: UInt64 ,
94+ Some ( ( 20 ) . to_string( ) ) ) ,
95+ ConfigEntry :: new( BALLISTA_GRPC_CLIENT_HTTP2_KEEPALIVE_INTERVAL_SECONDS . to_string( ) ,
96+ "HTTP/2 keep-alive interval for gRPC client in seconds" . to_string( ) ,
97+ DataType :: UInt64 ,
98+ Some ( ( 300 ) . to_string( ) ) ) ,
7299 ConfigEntry :: new( BALLISTA_GRPC_CLIENT_MAX_MESSAGE_SIZE . to_string( ) ,
73100 "Configuration for max message size in gRPC clients" . to_string( ) ,
74101 DataType :: UInt64 ,
75102 Some ( ( 16 * 1024 * 1024 ) . to_string( ) ) ) ,
76- ConfigEntry :: new( BALLISTA_SHUFFLE_READER_MAX_REQUESTS . to_string( ) ,
77- "Maximum concurrent requests shuffle reader can process " . to_string( ) ,
103+ ConfigEntry :: new( BALLISTA_GRPC_CLIENT_TCP_KEEPALIVE_SECONDS . to_string( ) ,
104+ "TCP keep-alive interval for gRPC client in seconds " . to_string( ) ,
78105 DataType :: UInt64 ,
79- Some ( ( 64 ) . to_string( ) ) ) ,
106+ Some ( ( 3600 ) . to_string( ) ) ) ,
107+ ConfigEntry :: new( BALLISTA_GRPC_CLIENT_TIMEOUT_SECONDS . to_string( ) ,
108+ "Request timeout for gRPC client in seconds" . to_string( ) ,
109+ DataType :: UInt64 ,
110+ Some ( ( 20 ) . to_string( ) ) ) ,
111+
112+
113+ // Shuffle reader configuration
80114 ConfigEntry :: new( BALLISTA_SHUFFLE_READER_FORCE_REMOTE_READ . to_string( ) ,
81115 "Forces the shuffle reader to always read partitions via the Arrow Flight client, even when partitions are local to the node." . to_string( ) ,
82116 DataType :: Boolean ,
83117 Some ( ( false ) . to_string( ) ) ) ,
118+ ConfigEntry :: new( BALLISTA_SHUFFLE_READER_MAX_REQUESTS . to_string( ) ,
119+ "Maximum concurrent requests shuffle reader can process" . to_string( ) ,
120+ DataType :: UInt64 ,
121+ Some ( ( 64 ) . to_string( ) ) ) ,
84122 ConfigEntry :: new( BALLISTA_SHUFFLE_READER_REMOTE_PREFER_FLIGHT . to_string( ) ,
85123 "Forces the shuffle reader to use flight reader instead of block reader for remote read. Block reader usually has better performance and resource utilization" . to_string( ) ,
86124 DataType :: Boolean ,
87125 Some ( ( false ) . to_string( ) ) ) ,
88- ConfigEntry :: new( BALLISTA_GRPC_CLIENT_CONNECT_TIMEOUT_SECONDS . to_string( ) ,
89- "Connection timeout for gRPC client in seconds" . to_string( ) ,
90- DataType :: UInt64 ,
91- Some ( ( 20 ) . to_string( ) ) ) ,
92- ConfigEntry :: new( BALLISTA_GRPC_CLIENT_TIMEOUT_SECONDS . to_string( ) ,
93- "Request timeout for gRPC client in seconds" . to_string( ) ,
94- DataType :: UInt64 ,
95- Some ( ( 20 ) . to_string( ) ) ) ,
96- ConfigEntry :: new( BALLISTA_GRPC_CLIENT_TCP_KEEPALIVE_SECONDS . to_string( ) ,
97- "TCP keep-alive interval for gRPC client in seconds" . to_string( ) ,
98- DataType :: UInt64 ,
99- Some ( ( 3600 ) . to_string( ) ) ) ,
100- ConfigEntry :: new( BALLISTA_GRPC_CLIENT_HTTP2_KEEPALIVE_INTERVAL_SECONDS . to_string( ) ,
101- "HTTP/2 keep-alive interval for gRPC client in seconds" . to_string( ) ,
102- DataType :: UInt64 ,
103- Some ( ( 300 ) . to_string( ) ) )
104126 ] ;
105127 entries
106128 . into_iter ( )
@@ -264,6 +286,11 @@ impl BallistaConfig {
264286 self . get_bool_setting ( BALLISTA_SHUFFLE_READER_REMOTE_PREFER_FLIGHT )
265287 }
266288
289+ /// Allows skipping redundant validation of arrow IPC valid data.
290+ pub fn ballista_arrow_ipc_reader_skip_validation ( & self ) -> bool {
291+ self . get_bool_setting ( BALLISTA_ARROW_IPC_READER_SKIP_VALIDATION )
292+ }
293+
267294 fn get_usize_setting ( & self , key : & str ) -> usize {
268295 if let Some ( v) = self . settings . get ( key) {
269296 // infallible because we validate all configs in the constructor
0 commit comments