Skip to content

Commit fe502e8

Browse files
committed
docs: enhance Compression enum documentation and add default level method
1 parent 41e1742 commit fe502e8

File tree

2 files changed

+24
-1
lines changed

2 files changed

+24
-1
lines changed

python/datafusion/dataframe.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@
4141
# excerpt from deltalake
4242
# https://github.com/apache/datafusion-python/pull/981#discussion_r1905619163
4343
class Compression(Enum):
44+
"""Enum representing the available compression types for Parquet files."""
45+
4446
UNCOMPRESSED = "uncompressed"
4547
SNAPPY = "snappy"
4648
GZIP = "gzip"
@@ -52,6 +54,17 @@ class Compression(Enum):
5254

5355
@classmethod
5456
def from_str(cls, value: str) -> "Compression":
57+
"""Convert a string to a Compression enum value.
58+
59+
Args:
60+
value (str): The string representation of the compression type.
61+
62+
Returns:
63+
Compression: The corresponding Compression enum value.
64+
65+
Raises:
66+
ValueError: If the string does not match any Compression enum value.
67+
"""
5568
try:
5669
return cls(value.lower())
5770
except ValueError:
@@ -60,6 +73,14 @@ def from_str(cls, value: str) -> "Compression":
6073
)
6174

6275
def get_default_level(self) -> int:
76+
"""Get the default compression level for the compression type.
77+
78+
Returns:
79+
int: The default compression level.
80+
81+
Raises:
82+
KeyError: If the compression type does not have a default level.
83+
"""
6384
# GZIP, BROTLI defaults from deltalake
6485
# https://github.com/apache/datafusion-python/pull/981#discussion_r1905619163
6586
if self == Compression.GZIP:

python/tests/test_dataframe.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1113,7 +1113,9 @@ def test_write_compressed_parquet_invalid_compression(df, tmp_path, compression)
11131113
df.write_parquet(str(path), compression=compression)
11141114

11151115

1116-
# test write_parquet with zstd, brotli default compression level, should complete without error
1116+
# Test write_parquet with zstd, brotli default compression level,
1117+
# ie don't specify compression level
1118+
# should complete without error
11171119
@pytest.mark.parametrize("compression", ["zstd", "brotli"])
11181120
def test_write_compressed_parquet_default_compression_level(df, tmp_path, compression):
11191121
path = tmp_path

0 commit comments

Comments
 (0)