@@ -48,6 +48,21 @@ def kms_key(cloudformation_outputs):
4848 yield cloudformation_outputs ["KmsKeyArn" ]
4949
5050
51+ @pytest .fixture (scope = "module" )
52+ def external_schema (cloudformation_outputs , database ):
53+ region = cloudformation_outputs .get ("Region" )
54+ sql = f"""
55+ CREATE EXTERNAL SCHEMA IF NOT EXISTS aws_data_wrangler_external FROM data catalog
56+ DATABASE '{ database } '
57+ IAM_ROLE '{ cloudformation_outputs ["RedshiftRole" ]} '
58+ REGION '{ region } ';
59+ """
60+ engine = wr .catalog .get_engine (connection = f"aws-data-wrangler-redshift" )
61+ with engine .connect () as con :
62+ con .execute (sql )
63+ yield "aws_data_wrangler_external"
64+
65+
5166@pytest .fixture (scope = "module" )
5267def workgroup0 (bucket ):
5368 wkg_name = "awswrangler_test_0"
@@ -957,3 +972,43 @@ def test_csv_compress(bucket, compression):
957972 for df3 in dfs :
958973 assert len (df3 .columns ) == 10
959974 wr .s3 .delete_objects (path = path )
975+
976+
977+ def test_parquet_char_length (bucket , database , external_schema ):
978+ path = f"s3://{ bucket } /test_parquet_char_length/"
979+ table = "test_parquet_char_length"
980+
981+ df = pd .DataFrame ({
982+ "id" : [1 , 2 ],
983+ "cchar" : ["foo" , "boo" ],
984+ "date" : [datetime .date (2020 , 1 , 1 ), datetime .date (2020 , 1 , 2 )]
985+ })
986+ wr .s3 .to_parquet (
987+ df = df ,
988+ path = path ,
989+ dataset = True ,
990+ database = database ,
991+ table = table ,
992+ mode = "overwrite" ,
993+ partition_cols = ["date" ],
994+ dtype = {'cchar' : 'char(3)' }
995+ )
996+
997+ df2 = wr .s3 .read_parquet (path , dataset = True )
998+ assert len (df2 .index ) == 2
999+ assert len (df2 .columns ) == 3
1000+ assert df2 .id .sum () == 3
1001+
1002+ df2 = wr .athena .read_sql_table (table = table , database = database )
1003+ assert len (df2 .index ) == 2
1004+ assert len (df2 .columns ) == 3
1005+ assert df2 .id .sum () == 3
1006+
1007+ engine = wr .catalog .get_engine ("aws-data-wrangler-redshift" )
1008+ df2 = wr .db .read_sql_table (con = engine , table = table , schema = external_schema )
1009+ assert len (df2 .index ) == 2
1010+ assert len (df2 .columns ) == 3
1011+ assert df2 .id .sum () == 3
1012+
1013+ wr .s3 .delete_objects (path = path )
1014+ assert wr .catalog .delete_table_if_exists (database = database , table = table ) is True
0 commit comments