fix: remove LZO compression option and related test cases

kosiew · kosiew · commit 3c7b68a0a30b · 2025-01-13T15:36:33.000+08:00
diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py
@@ -699,10 +699,10 @@ def write_parquet(
                 - "snappy": Snappy compression.
                 - "gzip": Gzip compression.
                 - "brotli": Brotli compression.
-                - "lzo": LZO compression.
                 - "lz4": LZ4 compression.
                 - "lz4_raw": LZ4_RAW compression.
                 - "zstd": Zstandard compression.
+            Note: LZO is not yet implemented in arrow-rs and is therefore excluded.
             compression_level: Compression level to use. For ZSTD, the
                 recommended range is 1 to 22, with the default being 4. Higher levels
                 provide better compression but slower speed.
diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py
@@ -731,7 +731,9 @@ def test_optimized_logical_plan(aggregate_df):
 def test_execution_plan(aggregate_df):
     plan = aggregate_df.execution_plan()
 
-    expected = "AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[sum(test.c2)]\n"  # noqa: E501
+    expected = (
+        "AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[sum(test.c2)]\n"  # noqa: E501
+    )
 
     assert expected == plan.display()
 
@@ -1126,16 +1128,6 @@ def test_write_compressed_parquet_default_compression_level(df, tmp_path, compre
     df.write_parquet(str(path), compression=compression)
 
 
-# lzo is not a valid Compression yet
-# https://github.com/apache/arrow-rs/issues/6970
-# Test write_parquet with lzo compression, should raise an error
-def test_write_compressed_parquet_lzo(df, tmp_path):
-    path = tmp_path / "test.parquet"
-
-    with pytest.raises(ValueError, match="lzo is not a valid Compression"):
-        df.write_parquet(str(path), compression="lzo")
-
-
 def test_dataframe_export(df) -> None:
     # Guarantees that we have the canonical implementation
     # reading our dataframe export