@@ -2208,3 +2208,225 @@ async fn test_remove_files_with_modified_selection_vector() -> Result<(), Box<dy
22082208 }
22092209 Ok ( ( ) )
22102210}
2211+
2212+ // Helper function to create a table with CDF enabled
2213+ async fn create_cdf_table (
2214+ table_name : & str ,
2215+ schema : SchemaRef ,
2216+ ) -> Result < ( Url , Arc < DefaultEngine < TokioBackgroundExecutor > > , TempDir ) , Box < dyn std:: error:: Error > >
2217+ {
2218+ let tmp_dir = tempdir ( ) ?;
2219+ let tmp_test_dir_url = Url :: from_directory_path ( tmp_dir. path ( ) ) . unwrap ( ) ;
2220+
2221+ let ( store, engine, table_location) = engine_store_setup ( table_name, Some ( & tmp_test_dir_url) ) ;
2222+
2223+ let table_url = create_table (
2224+ store. clone ( ) ,
2225+ table_location,
2226+ schema. clone ( ) ,
2227+ & [ ] ,
2228+ true , // use protocol 3.7
2229+ vec ! [ ] ,
2230+ vec ! [ "changeDataFeed" ] ,
2231+ )
2232+ . await ?;
2233+
2234+ Ok ( ( table_url, Arc :: new ( engine) , tmp_dir) )
2235+ }
2236+
2237+ // Helper function to write data to a table
2238+ async fn write_data_to_table (
2239+ table_url : & Url ,
2240+ engine : & Arc < DefaultEngine < TokioBackgroundExecutor > > ,
2241+ schema : SchemaRef ,
2242+ values : Vec < i32 > ,
2243+ ) -> Result < Version , Box < dyn std:: error:: Error > > {
2244+ let snapshot = Snapshot :: builder_for ( table_url. clone ( ) ) . build ( engine. as_ref ( ) ) ?;
2245+ let mut txn = snapshot
2246+ . transaction ( Box :: new ( FileSystemCommitter :: new ( ) ) ) ?
2247+ . with_engine_info ( "test" ) ;
2248+
2249+ add_files_to_transaction ( & mut txn, engine, schema, values) . await ?;
2250+
2251+ let result = txn. commit ( engine. as_ref ( ) ) ?;
2252+ match result {
2253+ CommitResult :: CommittedTransaction ( committed) => Ok ( committed. commit_version ( ) ) ,
2254+ _ => panic ! ( "Transaction should be committed" ) ,
2255+ }
2256+ }
2257+
2258+ // Helper function to add files to an existing transaction
2259+ async fn add_files_to_transaction (
2260+ txn : & mut delta_kernel:: transaction:: Transaction ,
2261+ engine : & Arc < DefaultEngine < TokioBackgroundExecutor > > ,
2262+ schema : SchemaRef ,
2263+ values : Vec < i32 > ,
2264+ ) -> Result < ( ) , Box < dyn std:: error:: Error > > {
2265+ let data = RecordBatch :: try_new (
2266+ Arc :: new ( schema. as_ref ( ) . try_into_arrow ( ) ?) ,
2267+ vec ! [ Arc :: new( Int32Array :: from( values) ) ] ,
2268+ ) ?;
2269+
2270+ let write_context = Arc :: new ( txn. get_write_context ( ) ) ;
2271+ let add_files_metadata = engine
2272+ . write_parquet (
2273+ & ArrowEngineData :: new ( data) ,
2274+ write_context. as_ref ( ) ,
2275+ HashMap :: new ( ) ,
2276+ )
2277+ . await ?;
2278+ txn. add_files ( add_files_metadata) ;
2279+ Ok ( ( ) )
2280+ }
2281+
2282+ #[ tokio:: test]
2283+ async fn test_cdf_write_all_adds_succeeds ( ) -> Result < ( ) , Box < dyn std:: error:: Error > > {
2284+ // This test verifies that add-only transactions work with CDF enabled
2285+ let _ = tracing_subscriber:: fmt:: try_init ( ) ;
2286+
2287+ let schema = Arc :: new ( StructType :: try_new ( vec ! [ StructField :: nullable(
2288+ "number" ,
2289+ DataType :: INTEGER ,
2290+ ) ] ) ?) ;
2291+
2292+ let ( table_url, engine, _tmp_dir) =
2293+ create_cdf_table ( "test_cdf_all_adds" , schema. clone ( ) ) . await ?;
2294+
2295+ // Add files - this should succeed
2296+ let version = write_data_to_table ( & table_url, & engine, schema, vec ! [ 1 , 2 , 3 ] ) . await ?;
2297+ assert_eq ! ( version, 1 ) ;
2298+
2299+ Ok ( ( ) )
2300+ }
2301+
2302+ #[ tokio:: test]
2303+ async fn test_cdf_write_all_removes_succeeds ( ) -> Result < ( ) , Box < dyn std:: error:: Error > > {
2304+ // This test verifies that remove-only transactions work with CDF enabled
2305+ let _ = tracing_subscriber:: fmt:: try_init ( ) ;
2306+
2307+ let schema = Arc :: new ( StructType :: try_new ( vec ! [ StructField :: nullable(
2308+ "number" ,
2309+ DataType :: INTEGER ,
2310+ ) ] ) ?) ;
2311+
2312+ let ( table_url, engine, _tmp_dir) =
2313+ create_cdf_table ( "test_cdf_all_removes" , schema. clone ( ) ) . await ?;
2314+
2315+ // First, add some data
2316+ write_data_to_table ( & table_url, & engine, schema, vec ! [ 1 , 2 , 3 ] ) . await ?;
2317+
2318+ // Now remove the files
2319+ let snapshot = Snapshot :: builder_for ( table_url. clone ( ) ) . build ( engine. as_ref ( ) ) ?;
2320+ let mut txn = snapshot
2321+ . clone ( )
2322+ . transaction ( Box :: new ( FileSystemCommitter :: new ( ) ) ) ?
2323+ . with_engine_info ( "cdf remove test" )
2324+ . with_data_change ( true ) ;
2325+
2326+ let scan = snapshot. scan_builder ( ) . build ( ) ?;
2327+ let scan_metadata = scan. scan_metadata ( engine. as_ref ( ) ) ?. next ( ) . unwrap ( ) ?;
2328+ let ( data, selection_vector) = scan_metadata. scan_files . into_parts ( ) ;
2329+ txn. remove_files ( FilteredEngineData :: try_new ( data, selection_vector) ?) ;
2330+
2331+ // This should succeed - remove-only transactions are allowed with CDF
2332+ let result = txn. commit ( engine. as_ref ( ) ) ?;
2333+ match result {
2334+ CommitResult :: CommittedTransaction ( committed) => {
2335+ assert_eq ! ( committed. commit_version( ) , 2 ) ;
2336+ }
2337+ _ => panic ! ( "Transaction should be committed" ) ,
2338+ }
2339+
2340+ Ok ( ( ) )
2341+ }
2342+
2343+ #[ tokio:: test]
2344+ async fn test_cdf_write_mixed_no_data_change_succeeds ( ) -> Result < ( ) , Box < dyn std:: error:: Error > > {
2345+ // This test verifies that mixed add+remove transactions work when dataChange=false.
2346+ // It's allowed because the transaction does not contain any logical data changes.
2347+ // This can happen when a table is being optimized/compacted.
2348+ let _ = tracing_subscriber:: fmt:: try_init ( ) ;
2349+
2350+ let schema = Arc :: new ( StructType :: try_new ( vec ! [ StructField :: nullable(
2351+ "number" ,
2352+ DataType :: INTEGER ,
2353+ ) ] ) ?) ;
2354+
2355+ let ( table_url, engine, _tmp_dir) =
2356+ create_cdf_table ( "test_cdf_mixed_no_data_change" , schema. clone ( ) ) . await ?;
2357+
2358+ // First, add some data
2359+ write_data_to_table ( & table_url, & engine, schema. clone ( ) , vec ! [ 1 , 2 , 3 ] ) . await ?;
2360+
2361+ // Now create a transaction with both add AND remove files, but dataChange=false
2362+ let snapshot = Snapshot :: builder_for ( table_url. clone ( ) ) . build ( engine. as_ref ( ) ) ?;
2363+ let mut txn = snapshot
2364+ . clone ( )
2365+ . transaction ( Box :: new ( FileSystemCommitter :: new ( ) ) ) ?
2366+ . with_engine_info ( "cdf mixed test" )
2367+ . with_data_change ( false ) ; // dataChange=false is key here
2368+
2369+ // Add new files
2370+ add_files_to_transaction ( & mut txn, & engine, schema, vec ! [ 4 , 5 , 6 ] ) . await ?;
2371+
2372+ // Also remove existing files
2373+ let scan = snapshot. scan_builder ( ) . build ( ) ?;
2374+ let scan_metadata = scan. scan_metadata ( engine. as_ref ( ) ) ?. next ( ) . unwrap ( ) ?;
2375+ let ( data, selection_vector) = scan_metadata. scan_files . into_parts ( ) ;
2376+ txn. remove_files ( FilteredEngineData :: try_new ( data, selection_vector) ?) ;
2377+
2378+ // This should succeed - mixed operations are allowed when dataChange=false
2379+ let result = txn. commit ( engine. as_ref ( ) ) ?;
2380+ match result {
2381+ CommitResult :: CommittedTransaction ( committed) => {
2382+ assert_eq ! ( committed. commit_version( ) , 2 ) ;
2383+ }
2384+ _ => panic ! ( "Transaction should be committed" ) ,
2385+ }
2386+
2387+ Ok ( ( ) )
2388+ }
2389+
2390+ #[ tokio:: test]
2391+ async fn test_cdf_write_mixed_with_data_change_fails ( ) -> Result < ( ) , Box < dyn std:: error:: Error > > {
2392+ // This test verifies that mixed add+remove transactions fail with helpful error when dataChange=true
2393+ let _ = tracing_subscriber:: fmt:: try_init ( ) ;
2394+
2395+ let schema = Arc :: new ( StructType :: try_new ( vec ! [ StructField :: nullable(
2396+ "number" ,
2397+ DataType :: INTEGER ,
2398+ ) ] ) ?) ;
2399+
2400+ let ( table_url, engine, _tmp_dir) =
2401+ create_cdf_table ( "test_cdf_mixed_with_data_change" , schema. clone ( ) ) . await ?;
2402+
2403+ // First, add some data
2404+ write_data_to_table ( & table_url, & engine, schema. clone ( ) , vec ! [ 1 , 2 , 3 ] ) . await ?;
2405+
2406+ // Now create a transaction with both add AND remove files with dataChange=true
2407+ let snapshot = Snapshot :: builder_for ( table_url. clone ( ) ) . build ( engine. as_ref ( ) ) ?;
2408+ let mut txn = snapshot
2409+ . clone ( )
2410+ . transaction ( Box :: new ( FileSystemCommitter :: new ( ) ) ) ?
2411+ . with_engine_info ( "cdf mixed fail test" )
2412+ . with_data_change ( true ) ; // dataChange=true - this should fail
2413+
2414+ // Add new files
2415+ add_files_to_transaction ( & mut txn, & engine, schema, vec ! [ 4 , 5 , 6 ] ) . await ?;
2416+
2417+ // Also remove existing files
2418+ let scan = snapshot. scan_builder ( ) . build ( ) ?;
2419+ let scan_metadata = scan. scan_metadata ( engine. as_ref ( ) ) ?. next ( ) . unwrap ( ) ?;
2420+ let ( data, selection_vector) = scan_metadata. scan_files . into_parts ( ) ;
2421+ txn. remove_files ( FilteredEngineData :: try_new ( data, selection_vector) ?) ;
2422+
2423+ // This should fail with our new error message
2424+ assert_result_error_with_message (
2425+ txn. commit ( engine. as_ref ( ) ) ,
2426+ "Cannot add and remove data in the same transaction when Change Data Feed is enabled (delta.enableChangeDataFeed = true). \
2427+ This would require writing CDC files for DML operations, which is not yet supported. \
2428+ Consider using separate transactions: one to add files, another to remove files."
2429+ ) ;
2430+
2431+ Ok ( ( ) )
2432+ }
0 commit comments