@@ -650,6 +650,74 @@ def test_athena_to_iceberg_merge_into(path: str, path2: str, glue_database: str,
650650 assert_pandas_equals (df_expected , df_out )
651651
652652
653+ def test_athena_to_iceberg_merge_into_nulls (path : str , path2 : str , glue_database : str , glue_table : str ) -> None :
654+ df = pd .DataFrame (
655+ {
656+ "col1" : ["a" , "a" , "a" , np .nan ],
657+ "col2" : [0.0 , 1.1 , np .nan , 2.2 ],
658+ "action" : ["insert" , "insert" , "insert" , "insert" ],
659+ }
660+ )
661+ df ["col1" ] = df ["col1" ].astype ("string" )
662+ df ["col2" ] = df ["col2" ].astype ("float64" )
663+ df ["action" ] = df ["action" ].astype ("string" )
664+
665+ wr .athena .to_iceberg (
666+ df = df ,
667+ database = glue_database ,
668+ table = glue_table ,
669+ table_location = path ,
670+ temp_path = path2 ,
671+ keep_files = False ,
672+ )
673+
674+ # Perform MERGE INTO
675+ df2 = pd .DataFrame (
676+ {
677+ "col1" : ["a" , "a" , np .nan , "b" ],
678+ "col2" : [1.1 , np .nan , 2.2 , 3.3 ],
679+ "action" : ["update" , "update" , "update" , "insert" ],
680+ }
681+ )
682+ df2 ["col1" ] = df2 ["col1" ].astype ("string" )
683+ df2 ["col2" ] = df2 ["col2" ].astype ("float64" )
684+ df2 ["action" ] = df2 ["action" ].astype ("string" )
685+
686+ wr .athena .to_iceberg (
687+ df = df2 ,
688+ database = glue_database ,
689+ table = glue_table ,
690+ table_location = path ,
691+ temp_path = path2 ,
692+ keep_files = False ,
693+ merge_cols = ["col1" , "col2" ],
694+ )
695+
696+ # Expected output
697+ df_expected = pd .DataFrame (
698+ {
699+ "col1" : ["a" , "a" , "a" , np .nan , "b" ],
700+ "col2" : [0.0 , 1.1 , np .nan , 2.2 , 3.3 ],
701+ "action" : ["insert" , "update" , "update" , "update" , "insert" ],
702+ }
703+ )
704+ df_expected ["col1" ] = df_expected ["col1" ].astype ("string" )
705+ df_expected ["col2" ] = df_expected ["col2" ].astype ("float64" )
706+ df_expected ["action" ] = df_expected ["action" ].astype ("string" )
707+
708+ df_out = wr .athena .read_sql_query (
709+ sql = f'SELECT * FROM "{ glue_table } "' ,
710+ database = glue_database ,
711+ ctas_approach = False ,
712+ unload_approach = False ,
713+ )
714+
715+ assert_pandas_equals (
716+ df_out .sort_values (df_out .columns .to_list ()).reset_index (drop = True ),
717+ df_expected .sort_values (df_expected .columns .to_list ()).reset_index (drop = True ),
718+ )
719+
720+
653721def test_athena_to_iceberg_merge_into_ignore (path : str , path2 : str , glue_database : str , glue_table : str ) -> None :
654722 df = pd .DataFrame ({"title" : ["Dune" , "Fargo" ], "year" : ["1984" , "1996" ], "gross" : [35_000_000 , 60_000_000 ]})
655723 df ["title" ] = df ["title" ].astype ("string" )
0 commit comments