Skip to content

Commit 1a37722

Browse files
committed
Add support for uint8, uint16, uint32 and uint64. #76
1 parent c693b26 commit 1a37722

File tree

2 files changed

+34
-3
lines changed

2 files changed

+34
-3
lines changed

awswrangler/_data_types.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,12 +114,14 @@ def pyarrow2athena(dtype: pa.DataType) -> str: # pylint: disable=too-many-branc
114114
"""Pyarrow to Athena data types conversion."""
115115
if pa.types.is_int8(dtype):
116116
return "tinyint"
117-
if pa.types.is_int16(dtype):
117+
if pa.types.is_int16(dtype) or pa.types.is_uint8(dtype):
118118
return "smallint"
119-
if pa.types.is_int32(dtype):
119+
if pa.types.is_int32(dtype) or pa.types.is_uint16(dtype):
120120
return "int"
121-
if pa.types.is_int64(dtype):
121+
if pa.types.is_int64(dtype) or pa.types.is_uint32(dtype):
122122
return "bigint"
123+
if pa.types.is_uint64(dtype):
124+
raise exceptions.UnsupportedType("There is no support for uint64, please consider int64 or uint32.")
123125
if pa.types.is_float32(dtype):
124126
return "float"
125127
if pa.types.is_float64(dtype):

testing/test_awswrangler/test_data_lake.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1367,3 +1367,32 @@ def test_copy_replacing_filename(bucket):
13671367
assert objs[0] == expected_file
13681368
wr.s3.delete_objects(path=path)
13691369
wr.s3.delete_objects(path=path2)
1370+
1371+
1372+
def test_unsigned_parquet(bucket, database):
1373+
path = f"s3://{bucket}/test_unsigned_parquet/"
1374+
table = "test_unsigned_parquet"
1375+
wr.s3.delete_objects(path=path)
1376+
df = pd.DataFrame({"c0": [0, 0, (2 ** 8) - 1], "c1": [0, 0, (2 ** 16) - 1], "c2": [0, 0, (2 ** 32) - 1]})
1377+
df["c0"] = df.c0.astype("uint8")
1378+
df["c1"] = df.c1.astype("uint16")
1379+
df["c2"] = df.c2.astype("uint32")
1380+
paths = wr.s3.to_parquet(df=df, path=path, dataset=True, database=database, table=table, mode="overwrite")["paths"]
1381+
wr.s3.wait_objects_exist(paths=paths, use_threads=False)
1382+
df = wr.athena.read_sql_table(table=table, database=database)
1383+
assert df.c0.sum() == (2 ** 8) - 1
1384+
assert df.c1.sum() == (2 ** 16) - 1
1385+
assert df.c2.sum() == (2 ** 32) - 1
1386+
schema = wr.s3.read_parquet_metadata(path=path)[0]
1387+
assert schema["c0"] == "smallint"
1388+
assert schema["c1"] == "int"
1389+
assert schema["c2"] == "bigint"
1390+
df = wr.s3.read_parquet(path=path)
1391+
assert df.c0.sum() == (2 ** 8) - 1
1392+
assert df.c1.sum() == (2 ** 16) - 1
1393+
assert df.c2.sum() == (2 ** 32) - 1
1394+
1395+
df = pd.DataFrame({"c0": [0, 0, (2 ** 64) - 1]})
1396+
df["c0"] = df.c0.astype("uint64")
1397+
with pytest.raises(wr.exceptions.UnsupportedType):
1398+
wr.s3.to_parquet(df=df, path=path, dataset=True, database=database, table=table, mode="overwrite")

0 commit comments

Comments
 (0)