@@ -157,11 +157,11 @@ def test_catalog_get_databases(glue_database):
157157 assert db ["Description" ] == "AWS Data Wrangler Test Arena - Glue Database"
158158
159159
160- def test_catalog_versioning (path , glue_database , glue_table ):
160+ def test_catalog_versioning (path , glue_database , glue_table , glue_table2 ):
161161 wr .catalog .delete_table_if_exists (database = glue_database , table = glue_table )
162162 wr .s3 .delete_objects (path = path )
163163
164- # Version 0
164+ # Version 1 - Parquet
165165 df = pd .DataFrame ({"c0" : [1 , 2 ]})
166166 wr .s3 .to_parquet (df = df , path = path , dataset = True , database = glue_database , table = glue_table , mode = "overwrite" )[
167167 "paths"
@@ -172,7 +172,7 @@ def test_catalog_versioning(path, glue_database, glue_table):
172172 assert len (df .columns ) == 1
173173 assert str (df .c0 .dtype ).startswith ("Int" )
174174
175- # Version 1
175+ # Version 2 - Parquet
176176 df = pd .DataFrame ({"c1" : ["foo" , "boo" ]})
177177 wr .s3 .to_parquet (
178178 df = df ,
@@ -189,38 +189,56 @@ def test_catalog_versioning(path, glue_database, glue_table):
189189 assert len (df .columns ) == 1
190190 assert str (df .c1 .dtype ) == "string"
191191
192- # Version 2
192+ # Version 1 - CSV
193193 df = pd .DataFrame ({"c1" : [1.0 , 2.0 ]})
194194 wr .s3 .to_csv (
195195 df = df ,
196196 path = path ,
197197 dataset = True ,
198198 database = glue_database ,
199- table = glue_table ,
199+ table = glue_table2 ,
200200 mode = "overwrite" ,
201201 catalog_versioning = True ,
202202 index = False ,
203203 )
204- assert wr .catalog .get_table_number_of_versions (table = glue_table , database = glue_database ) == 3
205- df = wr .athena .read_sql_table (table = glue_table , database = glue_database )
204+ assert wr .catalog .get_table_number_of_versions (table = glue_table2 , database = glue_database ) == 1
205+ df = wr .athena .read_sql_table (table = glue_table2 , database = glue_database )
206206 assert len (df .index ) == 2
207207 assert len (df .columns ) == 1
208208 assert str (df .c1 .dtype ).startswith ("float" )
209209
210- # Version 3 (removing version 2 )
210+ # Version 1 - CSV (No evolution )
211211 df = pd .DataFrame ({"c1" : [True , False ]})
212212 wr .s3 .to_csv (
213213 df = df ,
214214 path = path ,
215215 dataset = True ,
216216 database = glue_database ,
217- table = glue_table ,
217+ table = glue_table2 ,
218218 mode = "overwrite" ,
219219 catalog_versioning = False ,
220220 index = False ,
221221 )
222- assert wr .catalog .get_table_number_of_versions (table = glue_table , database = glue_database ) == 3
223- df = wr .athena .read_sql_table (table = glue_table , database = glue_database )
222+ assert wr .catalog .get_table_number_of_versions (table = glue_table2 , database = glue_database ) == 1
223+ df = wr .athena .read_sql_table (table = glue_table2 , database = glue_database )
224+ assert len (df .index ) == 2
225+ assert len (df .columns ) == 1
226+ assert str (df .c1 .dtype ).startswith ("boolean" )
227+
228+ # Version 2 - CSV
229+ df = pd .DataFrame ({"c1" : [True , False ]})
230+ wr .s3 .to_csv (
231+ df = df ,
232+ path = path ,
233+ dataset = True ,
234+ database = glue_database ,
235+ table = glue_table2 ,
236+ mode = "overwrite" ,
237+ catalog_versioning = True ,
238+ index = False ,
239+ )
240+ assert wr .catalog .get_table_number_of_versions (table = glue_table2 , database = glue_database ) == 2
241+ df = wr .athena .read_sql_table (table = glue_table2 , database = glue_database )
224242 assert len (df .index ) == 2
225243 assert len (df .columns ) == 1
226244 assert str (df .c1 .dtype ).startswith ("boolean" )
0 commit comments