@@ -576,8 +576,19 @@ def test_athena_time_zone(glue_database):
576576 assert df ["value" ][0 ].year == datetime .datetime .utcnow ().year
577577
578578
579- @pytest .mark .xfail (raises = NotImplementedError , reason = "Unable to create pandas categorical from pyarrow table" )
580- def test_category (path , glue_table , glue_database ):
579+ @pytest .mark .parametrize (
580+ "ctas_approach" ,
581+ [
582+ pytest .param (False ),
583+ pytest .param (
584+ True ,
585+ marks = pytest .mark .xfail (
586+ raises = NotImplementedError , reason = "Unable to create pandas categorical from pyarrow table"
587+ ),
588+ ),
589+ ],
590+ )
591+ def test_category (path : str , glue_table : str , glue_database : str , ctas_approach : bool ) -> None :
581592 df = get_df_category ()
582593 wr .s3 .to_parquet (
583594 df = df ,
@@ -588,37 +599,42 @@ def test_category(path, glue_table, glue_database):
588599 mode = "overwrite" ,
589600 partition_cols = ["par0" , "par1" ],
590601 )
591- df2 = wr .s3 .read_parquet (
592- path = path ,
593- dataset = True ,
594- pyarrow_additional_kwargs = {
595- "categories" : [c for c in df .columns if c not in ["par0" , "par1" ]],
596- "strings_to_categorical" : True ,
597- },
598- )
599- ensure_data_types_category (df2 )
600- df2 = wr .athena .read_sql_query (f"SELECT * FROM { glue_table } " , database = glue_database , categories = list (df .columns ))
601- ensure_data_types_category (df2 )
602- df2 = wr .athena .read_sql_table (table = glue_table , database = glue_database , categories = list (df .columns ))
603- ensure_data_types_category (df2 )
602+
604603 df2 = wr .athena .read_sql_query (
605- f"SELECT * FROM { glue_table } " , database = glue_database , categories = list (df .columns ), ctas_approach = False
604+ f"SELECT * FROM { glue_table } " , database = glue_database , categories = list (df .columns ), ctas_approach = ctas_approach
606605 )
607606 ensure_data_types_category (df2 )
608- dfs = wr .athena .read_sql_query (
609- f"SELECT * FROM { glue_table } " ,
607+
608+
609+ @pytest .mark .parametrize (
610+ "ctas_approach" ,
611+ [
612+ pytest .param (False ),
613+ pytest .param (
614+ True ,
615+ marks = pytest .mark .xfail (
616+ raises = NotImplementedError , reason = "Unable to create pandas categorical from pyarrow table"
617+ ),
618+ ),
619+ ],
620+ )
621+ def test_category_chunked (path : str , glue_table : str , glue_database : str , ctas_approach : bool ) -> None :
622+ df = get_df_category ()
623+ wr .s3 .to_parquet (
624+ df = df ,
625+ path = path ,
626+ dataset = True ,
610627 database = glue_database ,
611- categories = list ( df . columns ) ,
612- ctas_approach = False ,
613- chunksize = 1 ,
628+ table = glue_table ,
629+ mode = "overwrite" ,
630+ partition_cols = [ "par0" , "par1" ] ,
614631 )
615- for df2 in dfs :
616- ensure_data_types_category (df2 )
632+
617633 dfs = wr .athena .read_sql_query (
618634 f"SELECT * FROM { glue_table } " ,
619635 database = glue_database ,
620636 categories = list (df .columns ),
621- ctas_approach = True ,
637+ ctas_approach = ctas_approach ,
622638 chunksize = 1 ,
623639 )
624640 for df2 in dfs :
0 commit comments