Skip to content

Commit 07bda17

Browse files
takeknockmalachi-constantjaidisido
authored
Enable s3.to_parquet to recieve "zstd" compression type (#1369)
* Add tests for compression type * feat: add compression type, zstd * fix: remove debug code * remove redundant cast to fix mypy error * style: fix to pass validate.sh * Revert "remove redundant cast to fix mypy error" This reverts commit d540098. * Refactor test Co-authored-by: Lucas Hanson <[email protected]> Co-authored-by: Abdel Jaidi <[email protected]>
1 parent 44bba32 commit 07bda17

File tree

3 files changed

+6
-4
lines changed

3 files changed

+6
-4
lines changed

awswrangler/s3/_write.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
"bz2": ".bz2",
1717
"xz": ".xz",
1818
"zip": ".zip",
19+
"zstd": ".zstd",
1920
}
2021

2122

awswrangler/s3/_write_parquet.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ def to_parquet( # pylint: disable=too-many-arguments,too-many-locals,too-many-b
258258
index : bool
259259
True to store the DataFrame index in file, otherwise False to ignore it.
260260
compression: str, optional
261-
Compression style (``None``, ``snappy``, ``gzip``).
261+
Compression style (``None``, ``snappy``, ``gzip``, ``zstd``).
262262
pyarrow_additional_kwargs : Optional[Dict[str, Any]]
263263
Additional parameters forwarded to pyarrow.
264264
e.g. pyarrow_additional_kwargs={'coerce_timestamps': 'ns', 'use_deprecated_int96_timestamps': False,
@@ -522,7 +522,7 @@ def to_parquet( # pylint: disable=too-many-arguments,too-many-locals,too-many-b
522522

523523
# Evaluating compression
524524
if _COMPRESSION_2_EXT.get(compression, None) is None:
525-
raise exceptions.InvalidCompression(f"{compression} is invalid, please use None, 'snappy' or 'gzip'.")
525+
raise exceptions.InvalidCompression(f"{compression} is invalid, please use None, 'snappy', 'gzip' or 'zstd'.")
526526
compression_ext: str = _COMPRESSION_2_EXT[compression]
527527

528528
# Initializing defaults

tests/test_s3_parquet.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -504,10 +504,11 @@ def test_mixed_types_column(path) -> None:
504504
wr.s3.to_parquet(df, path, dataset=True, partition_cols=["par"])
505505

506506

507-
def test_parquet_plain(path) -> None:
507+
@pytest.mark.parametrize("compression", [None, "snappy", "gzip", "zstd"])
508+
def test_parquet_compression(path, compression) -> None:
508509
df = pd.DataFrame({"id": [1, 2, 3]}, dtype="Int64")
509510
path_file = f"{path}0.parquet"
510-
wr.s3.to_parquet(df=df, path=path_file, compression=None)
511+
wr.s3.to_parquet(df=df, path=path_file, compression=compression)
511512
df2 = wr.s3.read_parquet([path_file])
512513
assert df.equals(df2)
513514

0 commit comments

Comments
 (0)