@@ -29,6 +29,7 @@ use crate::{
2929use arrow:: array:: RecordBatch ;
3030use arrow:: datatypes:: DataType as ArrowDataType ;
3131use datafusion:: common:: ScalarValue ;
32+ use datafusion:: config:: Extensions ;
3233use datafusion:: execution:: memory_pool:: {
3334 FairSpillPool , GreedyMemoryPool , MemoryPool , TrackConsumersPool , UnboundedMemoryPool ,
3435} ;
@@ -41,7 +42,8 @@ use datafusion::{
4142use datafusion_comet_proto:: spark_operator:: Operator ;
4243use futures:: poll;
4344use futures:: stream:: StreamExt ;
44- use jni:: objects:: { JByteBuffer , JMap } ;
45+ use jni:: objects:: JByteBuffer ;
46+ use jni:: objects:: JMap ;
4547use jni:: sys:: JNI_FALSE ;
4648use jni:: {
4749 errors:: Result as JNIResult ,
@@ -63,6 +65,7 @@ use std::time::{Duration, Instant};
6365use std:: { collections:: HashMap , sync:: Arc , task:: Poll } ;
6466use tokio:: runtime:: Runtime ;
6567
68+ use crate :: config:: CometNativeOptions ;
6669use crate :: execution:: fair_memory_pool:: CometFairMemoryPool ;
6770use crate :: execution:: operators:: ScanExec ;
6871use crate :: execution:: shuffle:: { read_ipc_compressed, CompressionCodec } ;
@@ -127,8 +130,6 @@ struct ExecutionContext {
127130 pub explain_native : bool ,
128131 /// Memory pool config
129132 pub memory_pool_config : MemoryPoolConfig ,
130- /// Apache Spark config
131- pub spark_config : HashMap < String , String > ,
132133}
133134
134135#[ derive( PartialEq , Eq ) ]
@@ -249,11 +250,28 @@ pub unsafe extern "system" fn Java_org_apache_comet_Native_createPlan(
249250 local_dirs. push ( local_dir. into ( ) ) ;
250251 }
251252
252- // We need to keep the session context alive. Some session state like temporary
253- // dictionaries are stored in session context. If it is dropped, the temporary
253+ // Read Apache Spark runtime config and memoize them in Comet options
254+ let spark_conf_map = JMap :: from_env ( & mut env, & spark_conf) ?;
255+ let mut spark_conf_iter = spark_conf_map. iter ( & mut env) ?;
256+ let mut comet_options = CometNativeOptions :: default ( ) ;
257+ while let Some ( ( key, value) ) = spark_conf_iter. next ( & mut env) ? {
258+ let key: String = env. get_string ( & JString :: from ( key) ) . unwrap ( ) . into ( ) ;
259+ if key == "spark.hadoop.fs.defaultFS" {
260+ let value: String = env. get_string ( & JString :: from ( value) ) . unwrap ( ) . into ( ) ;
261+ comet_options. default_fs = Some ( value) ;
262+ }
263+ }
264+
265+ // We need to keep the session context alive. Some session states,
266+ // like temporary dictionaries, are stored in the
267+ // session context. If it is dropped, the temporary
254268 // dictionaries will be dropped as well.
255- let session =
256- prepare_datafusion_session_context ( batch_size as usize , memory_pool, local_dirs) ?;
269+ let session = prepare_datafusion_session_context (
270+ batch_size as usize ,
271+ memory_pool,
272+ local_dirs,
273+ Arc :: from ( comet_options) ,
274+ ) ?;
257275
258276 let plan_creation_time = start. elapsed ( ) ;
259277
@@ -263,17 +281,6 @@ pub unsafe extern "system" fn Java_org_apache_comet_Native_createPlan(
263281 None
264282 } ;
265283
266- // Read Apache Spark runtime config
267- let spark_conf_map = JMap :: from_env ( & mut env, & spark_conf) ?;
268- let mut spark_conf_iter = spark_conf_map. iter ( & mut env) ?;
269- let mut spark_conf = HashMap :: new ( ) ;
270-
271- while let Some ( ( key, value) ) = spark_conf_iter. next ( & mut env) ? {
272- let key: String = env. get_string ( & JString :: from ( key) ) . unwrap ( ) . into ( ) ;
273- let value: String = env. get_string ( & JString :: from ( value) ) . unwrap ( ) . into ( ) ;
274- spark_conf. insert ( key, value) ;
275- }
276-
277284 let exec_context = Box :: new ( ExecutionContext {
278285 id,
279286 task_attempt_id,
@@ -291,7 +298,6 @@ pub unsafe extern "system" fn Java_org_apache_comet_Native_createPlan(
291298 debug_native : debug_native == 1 ,
292299 explain_native : explain_native == 1 ,
293300 memory_pool_config,
294- spark_config : spark_conf,
295301 } ) ;
296302
297303 Ok ( Box :: into_raw ( exec_context) as i64 )
@@ -303,6 +309,7 @@ fn prepare_datafusion_session_context(
303309 batch_size : usize ,
304310 memory_pool : Arc < dyn MemoryPool > ,
305311 local_dirs : Vec < String > ,
312+ comet_opts : Arc < CometNativeOptions > ,
306313) -> CometResult < SessionContext > {
307314 let disk_manager_config =
308315 DiskManagerConfig :: NewSpecified ( local_dirs. into_iter ( ) . map ( PathBuf :: from) . collect ( ) ) ;
@@ -323,7 +330,8 @@ fn prepare_datafusion_session_context(
323330 // maximum value is 1.0, so we set the threshold a little higher just
324331 // to be safe
325332 & ScalarValue :: Float64 ( Some ( 1.1 ) ) ,
326- ) ;
333+ )
334+ . with_extension ( comet_opts) ;
327335
328336 #[ allow( deprecated) ]
329337 let runtime = RuntimeEnv :: try_new ( rt_config) ?;
0 commit comments