@@ -2369,6 +2369,7 @@ def test_s3_overall_nan(bucket, database):
23692369
23702370def test_aurora_postgres_load_varchar (bucket , postgres_parameters ):
23712371 df = pd .DataFrame ({"id" : [1 , 2 , 3 ], "varchar3" : ["foo" , "boo" , "bar" ], "varchar1" : ["a" , "b" , "c" ]})
2372+ df ["varchar3" ] = df ["varchar3" ].astype ("string" )
23722373 path = f"s3://{ bucket } /test_aurora_postgres_load_varchar"
23732374 wr .pandas .to_aurora (dataframe = df ,
23742375 connection = "aws-data-wrangler-postgres" ,
@@ -2404,6 +2405,7 @@ def test_aurora_postgres_load_varchar(bucket, postgres_parameters):
24042405
24052406def test_aurora_mysql_load_varchar (bucket ):
24062407 df = pd .DataFrame ({"id" : [1 , 2 , 3 ], "varchar3" : ["foo" , "boo" , "bar" ], "varchar1" : ["a" , "b" , "c" ]})
2408+ df ["varchar3" ] = df ["varchar3" ].astype ("string" )
24072409 path = f"s3://{ bucket } /test_aurora_mysql_load_varchar"
24082410 wr .pandas .to_aurora (dataframe = df ,
24092411 connection = "aws-data-wrangler-mysql" ,
@@ -2430,3 +2432,51 @@ def test_aurora_mysql_load_varchar(bucket):
24302432 assert rows [1 ][2 ] == "b"
24312433 assert rows [2 ][2 ] == "c"
24322434 conn .close ()
2435+
2436+
2437+ def test_to_parquet_string (bucket , database ):
2438+ path = f"s3://{ bucket } /test_to_parquet_string"
2439+ wr .s3 .delete_objects (path = path )
2440+ df = pd .DataFrame ({
2441+ "id" : [1 , 2 , 3 , 4 , 5 ],
2442+ "c_str" : ["foo" , None , None , "bar" , None ],
2443+ })
2444+ df ["id" ] = df ["id" ].astype ("Int64" )
2445+ df ["c_str" ] = df ["c_str" ].astype ("string" )
2446+ wr .pandas .to_parquet (dataframe = df ,
2447+ database = database ,
2448+ path = path ,
2449+ mode = "overwrite" ,
2450+ preserve_index = False ,
2451+ procs_cpu_bound = 5 ,
2452+ inplace = False )
2453+ sleep (15 )
2454+ df2 = wr .pandas .read_sql_athena (database = database ,
2455+ sql = "SELECT * FROM test_to_parquet_string ORDER BY id" ,
2456+ ctas_approach = False )
2457+ wr .s3 .delete_objects (path = path )
2458+ assert df .equals (df2 )
2459+
2460+
2461+ def test_to_csv_string (bucket , database ):
2462+ path = f"s3://{ bucket } /test_to_csv_string"
2463+ wr .s3 .delete_objects (path = path )
2464+ df = pd .DataFrame ({
2465+ "id" : [1 , 2 , 3 , 4 , 5 ],
2466+ "c_str" : ["foo" , None , None , "bar" , None ],
2467+ })
2468+ df ["id" ] = df ["id" ].astype ("Int64" )
2469+ df ["c_str" ] = df ["c_str" ].astype ("string" )
2470+ wr .pandas .to_parquet (dataframe = df ,
2471+ database = database ,
2472+ path = path ,
2473+ mode = "overwrite" ,
2474+ preserve_index = False ,
2475+ procs_cpu_bound = 5 ,
2476+ inplace = False )
2477+ sleep (5 )
2478+ df2 = wr .pandas .read_sql_athena (database = database ,
2479+ sql = "SELECT * FROM test_to_csv_string ORDER BY id" ,
2480+ ctas_approach = False )
2481+ wr .s3 .delete_objects (path = path )
2482+ assert df .equals (df2 )
0 commit comments