@@ -36,16 +36,26 @@ use deltalake::operations::create::CreateBuilder;
3636use deltalake:: { DeltaTable , DeltaTableBuilder } ;
3737use futures:: StreamExt ;
3838use instrumented_object_store:: instrument_object_store;
39+ use std:: sync:: Mutex ;
3940use serde:: { Deserialize , Serialize } ;
4041use sqlx:: { PgPool , postgres:: PgPoolOptions } ;
4142use std:: fmt;
43+ use std:: sync:: OnceLock ;
4244use std:: { any:: Any , collections:: HashMap , sync:: Arc } ;
4345use tokio:: sync:: RwLock ;
4446use tokio_util:: sync:: CancellationToken ;
4547use tracing:: field:: Empty ;
4648use tracing:: { Instrument , debug, error, info, instrument, warn} ;
4749use url:: Url ;
4850
51+ /// Mutex to serialize access to environment variable modifications.
52+ /// Required because delta-rs uses std::env::var() for AWS credential resolution,
53+ /// and std::env::set_var is unsafe in multi-threaded contexts.
54+ static ENV_MUTEX : OnceLock < Mutex < ( ) > > = OnceLock :: new ( ) ;
55+ fn env_mutex ( ) -> & ' static Mutex < ( ) > {
56+ ENV_MUTEX . get_or_init ( || Mutex :: new ( ( ) ) )
57+ }
58+
4959// Changed to support multiple tables per project: (project_id, table_name) -> DeltaTable
5060pub type ProjectConfigs = Arc < RwLock < HashMap < ( String , String ) , Arc < RwLock < DeltaTable > > > > > ;
5161
@@ -1136,17 +1146,18 @@ impl Database {
11361146 async fn create_or_load_delta_table (
11371147 & self , storage_uri : & str , storage_options : HashMap < String , String > , cached_store : Arc < dyn object_store:: ObjectStore > ,
11381148 ) -> Result < DeltaTable > {
1139- // SAFETY: delta-rs internally uses std::env::var() for AWS credential resolution.
1140- // While set_var is unsafe in multi-threaded contexts (potential data races with concurrent
1141- // env reads), this is acceptable here because:
1142- // 1. We only set AWS_* vars which are read by the AWS SDK during client initialization
1143- // 2. The values are consistent across calls (same credentials for same storage_options)
1144- // 3. Delta table creation happens early in request processing, before parallel query execution
1145- // 4. The alternative (forking processes or thread-local storage) adds significant complexity
1146- for ( key, value) in & storage_options {
1147- if key. starts_with ( "AWS_" ) {
1148- unsafe {
1149- std:: env:: set_var ( key, value) ;
1149+ // delta-rs uses std::env::var() for AWS credential resolution.
1150+ // We serialize access with ENV_MUTEX to prevent data races from concurrent set_var calls.
1151+ {
1152+ let _guard = env_mutex ( ) . lock ( ) ;
1153+ for ( key, value) in & storage_options {
1154+ if key. starts_with ( "AWS_" ) {
1155+ // SAFETY: Protected by ENV_MUTEX. set_var is only unsafe due to potential
1156+ // concurrent reads, which we prevent by holding the mutex during the entire
1157+ // block. The mutex ensures only one thread modifies env vars at a time.
1158+ unsafe {
1159+ std:: env:: set_var ( key, value) ;
1160+ }
11501161 }
11511162 }
11521163 }
@@ -1194,9 +1205,8 @@ impl Database {
11941205
11951206 // Fallback to legacy batch queue if configured
11961207 let enable_queue = self . config . core . enable_batch_queue ;
1197- if !skip_queue && enable_queue && self . batch_queue . is_some ( ) {
1208+ if !skip_queue && enable_queue && let Some ( ref queue ) = self . batch_queue {
11981209 span. record ( "use_queue" , true ) ;
1199- let queue = self . batch_queue . as_ref ( ) . unwrap ( ) ;
12001210 for batch in batches {
12011211 if let Err ( e) = queue. queue ( batch) {
12021212 return Err ( anyhow:: anyhow!( "Queue error: {}" , e) ) ;
@@ -1724,12 +1734,19 @@ impl ProjectRoutingTable {
17241734 // delta table provider expects indices based on its own schema.
17251735 let delta_schema = provider. schema ( ) ;
17261736 let translated_projection = projection. map ( |proj| {
1727- proj. iter ( )
1728- . filter_map ( |& idx| {
1729- let col_name = self . schema . field ( idx) . name ( ) ;
1730- delta_schema. fields ( ) . iter ( ) . position ( |f| f. name ( ) == col_name)
1731- } )
1732- . collect :: < Vec < _ > > ( )
1737+ let mut translated = Vec :: with_capacity ( proj. len ( ) ) ;
1738+ for & idx in proj {
1739+ let col_name = self . schema . field ( idx) . name ( ) ;
1740+ if let Some ( delta_idx) = delta_schema. fields ( ) . iter ( ) . position ( |f| f. name ( ) == col_name) {
1741+ translated. push ( delta_idx) ;
1742+ } else {
1743+ warn ! (
1744+ "Column '{}' requested in projection but not found in Delta schema for table '{}'" ,
1745+ col_name, self . table_name
1746+ ) ;
1747+ }
1748+ }
1749+ translated
17331750 } ) ;
17341751
17351752 let delta_plan = provider. scan ( state, translated_projection. as_ref ( ) , filters, limit) . await ?;
0 commit comments