Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions python/datafusion/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,10 @@ class Compression(Enum):
GZIP = "gzip"
BROTLI = "brotli"
LZ4 = "lz4"
LZ0 = "lz0"
# TODO
# lzo is not implemented yet
# https://github.com/apache/arrow-rs/issues/6970
# LZO = "lzo"
ZSTD = "zstd"
LZ4_RAW = "lz4_raw"

Expand Down Expand Up @@ -696,10 +699,10 @@ def write_parquet(
- "snappy": Snappy compression.
- "gzip": Gzip compression.
- "brotli": Brotli compression.
- "lz0": LZ0 compression.
- "lz4": LZ4 compression.
- "lz4_raw": LZ4_RAW compression.
- "zstd": Zstandard compression.
Note: LZO is not yet implemented in arrow-rs and is therefore excluded.
compression_level: Compression level to use. For ZSTD, the
recommended range is 1 to 22, with the default being 4. Higher levels
provide better compression but slower speed.
Expand Down
3 changes: 3 additions & 0 deletions python/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1115,6 +1115,9 @@ def test_write_compressed_parquet_invalid_compression(df, tmp_path, compression)
df.write_parquet(str(path), compression=compression)


# TODO
# not testing lzo because it it not implemented yet
# https://github.com/apache/arrow-rs/issues/6970
@pytest.mark.parametrize("compression", ["zstd", "brotli", "gzip"])
def test_write_compressed_parquet_default_compression_level(df, tmp_path, compression):
# Test write_parquet with zstd, brotli, gzip default compression level,
Expand Down
2 changes: 1 addition & 1 deletion src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,7 @@ impl PyDataFrame {
ZstdLevel::try_new(verify_compression_level(compression_level)? as i32)
.map_err(|e| PyValueError::new_err(format!("{e}")))?,
),
"lz0" => Compression::LZO,
"lzo" => Compression::LZO,
"lz4" => Compression::LZ4,
"lz4_raw" => Compression::LZ4_RAW,
"uncompressed" => Compression::UNCOMPRESSED,
Expand Down
Loading