Skip to content

Commit 860431f

Browse files
feat: ignore nulls for container types (#2636)
* propagate the ignore_null flag for recursive calls to pyarrow2athena * update tests * lint --------- Co-authored-by: Leon Luttenberger <[email protected]>
1 parent 2f3ca80 commit 860431f

File tree

2 files changed

+8
-6
lines changed

2 files changed

+8
-6
lines changed

awswrangler/_data_types.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,15 +49,17 @@ def pyarrow2athena( # noqa: PLR0911,PLR0912
4949
if pa.types.is_binary(dtype):
5050
return "binary"
5151
if pa.types.is_dictionary(dtype):
52-
return pyarrow2athena(dtype=dtype.value_type)
52+
return pyarrow2athena(dtype=dtype.value_type, ignore_null=ignore_null)
5353
if pa.types.is_decimal(dtype):
5454
return f"decimal({dtype.precision},{dtype.scale})"
5555
if pa.types.is_list(dtype):
56-
return f"array<{pyarrow2athena(dtype=dtype.value_type)}>"
56+
return f"array<{pyarrow2athena(dtype=dtype.value_type, ignore_null=ignore_null)}>"
5757
if pa.types.is_struct(dtype):
58-
return f"struct<{','.join([f'{f.name}:{pyarrow2athena(dtype=f.type)}' for f in dtype])}>"
58+
return (
59+
f"struct<{','.join([f'{f.name}:{pyarrow2athena(dtype=f.type, ignore_null=ignore_null)}' for f in dtype])}>"
60+
)
5961
if pa.types.is_map(dtype):
60-
return f"map<{pyarrow2athena(dtype=dtype.key_type)},{pyarrow2athena(dtype=dtype.item_type)}>"
62+
return f"map<{pyarrow2athena(dtype=dtype.key_type, ignore_null=ignore_null)},{pyarrow2athena(dtype=dtype.item_type, ignore_null=ignore_null)}>"
6163
if dtype == pa.null():
6264
if ignore_null:
6365
return ""

tests/unit/test_athena_parquet.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -376,14 +376,14 @@ def test_store_metadata_partitions_sample_dataset(glue_database, glue_table, pat
376376

377377
def test_store_metadata_ignore_null_columns(glue_database, glue_table, path):
378378
df = pd.DataFrame({"c0": [0, 1, 2], "c1": [3, 4, 5], "c2_null": [None, None, None], "c3_null": [None, None, None]})
379-
wr.s3.to_parquet(df=df, path=path, dataset=True, dtype={"c2_null": "int", "c3_null": "int"})
379+
wr.s3.to_parquet(df=df, path=path, dataset=True, dtype={"c2_null": "int", "c3_null": "array<int>"})
380380
wr.s3.store_parquet_metadata(
381381
path=path,
382382
database=glue_database,
383383
table=glue_table,
384384
ignore_null=True,
385385
dataset=True,
386-
dtype={"c2_null": "int", "c3_null": "int"},
386+
dtype={"c2_null": "int", "c3_null": "array<int>"},
387387
)
388388

389389

0 commit comments

Comments
 (0)