11import logging
22from datetime import datetime , date
3+ from time import sleep
34
45import pytest
56import boto3
@@ -88,6 +89,8 @@ def test_read_csv(session, bucket, sample_name):
8889 [("snappy" , []), ("gzip" , ["date" , "value" ]), ("none" , ["time" ])],
8990)
9091def test_create_glue_table_parquet (session , bucket , database , compression , partition_by ):
92+ s3_path = f"s3://{ bucket } /test"
93+ session .s3 .delete_objects (path = s3_path )
9194 path = "data_samples/nano.csv"
9295 schema = "id INTEGER, name STRING, value DOUBLE, date DATE, time TIMESTAMP"
9396 timestamp_format = "yyyy-MM-dd"
@@ -100,12 +103,12 @@ def test_create_glue_table_parquet(session, bucket, database, compression, parti
100103 .withColumn ("my_array" , array (lit (0 ), lit (1 ))) \
101104 .withColumn ("my_struct" , struct (lit ("text" ).alias ("a" ), lit (1 ).alias ("b" ))) \
102105 .withColumn ("my_map" , create_map (lit ("k0" ), lit (1.0 ), lit ("k1" ), lit (2.0 )))
103- s3_path = f"s3://{ bucket } /test"
104106 dataframe .write \
105107 .mode ("overwrite" ) \
106108 .format ("parquet" ) \
107109 .partitionBy (partition_by ) \
108110 .save (compression = compression , path = s3_path )
111+ sleep (10 )
109112 session .spark .create_glue_table (dataframe = dataframe ,
110113 file_format = "parquet" ,
111114 partition_by = partition_by ,
@@ -119,6 +122,7 @@ def test_create_glue_table_parquet(session, bucket, database, compression, parti
119122 assert pandas_df .iloc [0 ]["counter" ] == 5
120123 query = "select my_array[1] as foo, my_struct.a as boo, my_map['k0'] as bar from test limit 1"
121124 pandas_df = session .pandas .read_sql_athena (sql = query , database = database )
125+ session .s3 .delete_objects (path = s3_path )
122126 assert pandas_df .iloc [0 ]["foo" ] == 0
123127 assert pandas_df .iloc [0 ]["boo" ] == "text"
124128 assert pandas_df .iloc [0 ]["bar" ] == 1.0
0 commit comments