@@ -707,3 +707,32 @@ def test_to_parquet_nested_structs(glue_database, glue_table, path):
707707 wr .s3 .to_parquet (df = df , path = path , dataset = True , database = glue_database , table = glue_table )
708708 df3 = wr .athena .read_sql_query (sql = f"SELECT * FROM { glue_table } " , database = glue_database )
709709 assert df3 .shape == (2 , 2 )
710+
711+
712+ def test_ignore_empty_files (glue_database , glue_table , path ):
713+ df = pd .DataFrame ({"c0" : [0 , 1 ], "c1" : ["foo" , "boo" ]})
714+ bucket , directory = wr ._utils .parse_path (path )
715+ wr .s3 .to_parquet (df = df , path = path , dataset = True , database = glue_database , table = glue_table )
716+ boto3 .client ("s3" ).put_object (Body = b"" , Bucket = bucket , Key = f"{ directory } to_be_ignored" )
717+ df2 = wr .athena .read_sql_query (sql = f"SELECT * FROM { glue_table } " , database = glue_database )
718+ assert df2 .shape == df .shape
719+ df3 = wr .s3 .read_parquet_table (database = glue_database , table = glue_table )
720+ assert df3 .shape == df .shape
721+
722+
723+ def test_suffix (glue_database , glue_table , path ):
724+ df = pd .DataFrame ({"c0" : [0 , 1 ], "c1" : ["foo" , "boo" ]})
725+ bucket , directory = wr ._utils .parse_path (path )
726+ wr .s3 .to_parquet (df = df , path = path , dataset = True , database = glue_database , table = glue_table )
727+ boto3 .client ("s3" ).put_object (Body = b"garbage" , Bucket = bucket , Key = f"{ directory } to_be_ignored" )
728+ df2 = wr .s3 .read_parquet_table (database = glue_database , table = glue_table , filename_suffix = ".parquet" )
729+ assert df2 .shape == df .shape
730+
731+
732+ def test_ignore_suffix (glue_database , glue_table , path ):
733+ df = pd .DataFrame ({"c0" : [0 , 1 ], "c1" : ["foo" , "boo" ]})
734+ bucket , directory = wr ._utils .parse_path (path )
735+ wr .s3 .to_parquet (df = df , path = path , dataset = True , database = glue_database , table = glue_table )
736+ boto3 .client ("s3" ).put_object (Body = b"garbage" , Bucket = bucket , Key = f"{ directory } to_be_ignored" )
737+ df2 = wr .s3 .read_parquet_table (database = glue_database , table = glue_table , filename_ignore_suffix = "ignored" )
738+ assert df2 .shape == df .shape
0 commit comments