add csv write unit test

timsaucer · timsaucer · commit d416a6830b7b · 2025-10-07T09:02:21.000-04:00
diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py
@@ -941,7 +941,7 @@ def write_csv(
             with_header: If true, output the CSV header row.
             write_options: Options that impact how the DataFrame is written.
         """
-        self.df.write_csv(str(path), with_header, write_options=write_options)
+        self.df.write_csv(str(path), with_header, write_options._raw_write_options)
 
     @overload
     def write_parquet(
@@ -1014,7 +1014,10 @@ def write_parquet(
             compression_level = compression.get_default_level()
 
         self.df.write_parquet(
-            str(path), compression.value, compression_level, write_options
+            str(path),
+            compression.value,
+            compression_level,
+            write_options._raw_write_options,
         )
 
     def write_parquet_with_options(
@@ -1071,7 +1074,7 @@ def write_parquet_with_options(
             str(path),
             options_internal,
             column_specific_options_internal,
-            write_options,
+            write_options._raw_write_options,
         )
 
     def write_json(
@@ -1085,7 +1088,7 @@ def write_json(
             path: Path of the JSON file to write.
             write_options: Options that impact how the DataFrame is written.
         """
-        self.df.write_json(str(path), write_options=write_options)
+        self.df.write_json(str(path), write_options=write_options._raw_write_options)
 
     def write_table(
         self, table_name: str, write_options: DataFrameWriteOptions | None = None
@@ -1096,7 +1099,7 @@ def write_table(
         Not all table providers support writing operations. See the individual
         implementations for details.
         """
-        self.df.write_table(table_name, write_options)
+        self.df.write_table(table_name, write_options._raw_write_options)
 
     def to_arrow_table(self) -> pa.Table:
         """Execute the :py:class:`DataFrame` and convert it into an Arrow Table.
@@ -1275,11 +1278,11 @@ def __init__(
         """Instantiate writer options for DataFrame."""
         write_options = DataFrameWriteOptionsInternal()
         if insert_operation is not None:
-            write_options = write_options.with_insert_operation(insert_operation)
+            write_options = write_options.with_insert_operation(insert_operation.value)
         write_options = write_options.with_single_file_output(single_file_output)
         if partition_by is not None:
             if isinstance(partition_by, str):
-                partition_by = [single_file_output]
+                partition_by = [partition_by]
             write_options = write_options.with_partition_by(partition_by)
 
         sort_by_raw = sort_list_to_raw_sort_list(sort_by)
diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py
@@ -40,6 +40,7 @@
 from datafusion import (
     functions as f,
 )
+from datafusion.dataframe import DataFrameWriteOptions
 from datafusion.dataframe_formatter import (
     DataFrameHtmlFormatter,
     configure_formatter,
@@ -1830,6 +1831,33 @@ def test_write_csv(ctx, df, tmp_path, path_to_str):
     assert result == expected
 
 
+@pytest.mark.parametrize(
+    ("sort_by", "expected_a"),
+    [
+        pytest.param(None, [1, 2, 3], id="unsorted"),
+        pytest.param(column("c"), [2, 1, 3], id="single_column_expr"),
+        pytest.param(
+            column("a").sort(ascending=False), [3, 2, 1], id="single_sort_expr"
+        ),
+        pytest.param([column("c"), column("b")], [2, 1, 3], id="list_col_expr"),
+        pytest.param(
+            [column("c").sort(ascending=False), column("b").sort(ascending=False)],
+            [3, 1, 2],
+            id="list_sort_expr",
+        ),
+    ],
+)
+def test_write_csv_with_options(ctx, df, tmp_path, sort_by, expected_a) -> None:
+    write_options = DataFrameWriteOptions(sort_by=sort_by)
+    df.write_csv(tmp_path, with_header=True, write_options=write_options)
+
+    ctx.register_csv("csv", tmp_path)
+    result = ctx.table("csv").to_pydict()["a"]
+    ctx.table("csv").show()
+
+    assert result == expected_a
+
+
 @pytest.mark.parametrize("path_to_str", [True, False])
 def test_write_json(ctx, df, tmp_path, path_to_str):
     path = str(tmp_path) if path_to_str else tmp_path
diff --git a/src/dataframe.rs b/src/dataframe.rs
@@ -746,10 +746,10 @@ impl PyDataFrame {
     /// Write a `DataFrame` to a CSV file.
     fn write_csv(
         &self,
+        py: Python,
         path: &str,
         with_header: bool,
         write_options: Option<PyDataFrameWriteOptions>,
-        py: Python,
     ) -> PyDataFusionResult<()> {
         let csv_options = CsvOptions {
             has_header: Some(with_header),
@@ -1078,15 +1078,22 @@ impl From<PyDataFrameWriteOptions> for DataFrameWriteOptions {
 #[pymethods]
 impl PyDataFrameWriteOptions {
     #[new]
-    fn new(insert_operation: PyInsertOp) -> Self {
+    fn new() -> Self {
         Self {
-            insert_operation: insert_operation.into(),
+            insert_operation: InsertOp::Append,
             single_file_output: false,
             partition_by: vec![],
             sort_by: vec![],
         }
     }
 
+    pub fn with_insert_operation(&self, insert_operation: PyInsertOp) -> Self {
+        let mut result = self.clone();
+
+        result.insert_operation = insert_operation.into();
+        result
+    }
+
     pub fn with_single_file_output(&self, single_file_output: bool) -> Self {
         let mut result = self.clone();