@@ -1295,9 +1295,7 @@ def test_athena_encryption(
12951295 assert len (df2 .columns ) == 2
12961296
12971297
1298- def test_athena_nested (bucket , database ):
1299- table = "test_athena_nested"
1300- path = f"s3://{ bucket } /{ table } /"
1298+ def test_athena_nested (path , database , table ):
13011299 df = pd .DataFrame (
13021300 {
13031301 "c0" : [[1 , 2 , 3 ], [4 , 5 , 6 ]],
@@ -2142,3 +2140,45 @@ def test_to_parquet_reverse_partitions(database, table, path, partition_cols):
21422140 assert df .c0 .sum () == df2 .c0 .sum ()
21432141 assert df .c1 .sum () == df2 .c1 .sum ()
21442142 assert df .c2 .sum () == df2 .c2 .sum ()
2143+
2144+
2145+ def test_to_parquet_nested_append (database , table , path ):
2146+ df = pd .DataFrame (
2147+ {
2148+ "c0" : [[1 , 2 , 3 ], [4 , 5 , 6 ]],
2149+ "c1" : [[[1 , 2 ], [3 , 4 ]], [[5 , 6 ], [7 , 8 ]]],
2150+ "c2" : [[["a" , "b" ], ["c" , "d" ]], [["e" , "f" ], ["g" , "h" ]]],
2151+ "c3" : [[], [[[[[[[[1 ]]]]]]]]],
2152+ "c4" : [{"a" : 1 }, {"a" : 1 }],
2153+ "c5" : [{"a" : {"b" : {"c" : [1 , 2 ]}}}, {"a" : {"b" : {"c" : [3 , 4 ]}}}],
2154+ }
2155+ )
2156+ paths = wr .s3 .to_parquet (df = df , path = path , dataset = True , database = database , table = table )["paths" ]
2157+ wr .s3 .wait_objects_exist (paths = paths , use_threads = False )
2158+ df2 = wr .athena .read_sql_query (sql = f"SELECT c0, c1, c2, c4 FROM { table } " , database = database )
2159+ assert len (df2 .index ) == 2
2160+ assert len (df2 .columns ) == 4
2161+ paths = wr .s3 .to_parquet (df = df , path = path , dataset = True , database = database , table = table )["paths" ]
2162+ wr .s3 .wait_objects_exist (paths = paths , use_threads = False )
2163+ df2 = wr .athena .read_sql_query (sql = f"SELECT c0, c1, c2, c4 FROM { table } " , database = database )
2164+ assert len (df2 .index ) == 4
2165+ assert len (df2 .columns ) == 4
2166+
2167+
2168+ def test_to_parquet_nested_cast (database , table , path ):
2169+ df = pd .DataFrame ({"c0" : [[1 , 2 , 3 ], [4 , 5 , 6 ]], "c1" : [[], []], "c2" : [{"a" : 1 , "b" : 2 }, {"a" : 3 , "b" : 4 }]})
2170+ paths = wr .s3 .to_parquet (
2171+ df = df ,
2172+ path = path ,
2173+ dataset = True ,
2174+ database = database ,
2175+ table = table ,
2176+ dtype = {"c0" : "array<double>" , "c1" : "array<string>" , "c2" : "struct<a:bigint, b:double>" },
2177+ )["paths" ]
2178+ wr .s3 .wait_objects_exist (paths = paths , use_threads = False )
2179+ df = pd .DataFrame ({"c0" : [[1 , 2 , 3 ], [4 , 5 , 6 ]], "c1" : [["a" ], ["b" ]], "c2" : [{"a" : 1 , "b" : 2 }, {"a" : 3 , "b" : 4 }]})
2180+ paths = wr .s3 .to_parquet (df = df , path = path , dataset = True , database = database , table = table )["paths" ]
2181+ wr .s3 .wait_objects_exist (paths = paths , use_threads = False )
2182+ df2 = wr .athena .read_sql_query (sql = f"SELECT c0, c2 FROM { table } " , database = database )
2183+ assert len (df2 .index ) == 4
2184+ assert len (df2 .columns ) == 2
0 commit comments