|
18 | 18 | import geoarrow.types as gat |
19 | 19 | import geopandas.testing |
20 | 20 | import pandas as pd |
| 21 | +from pathlib import Path |
21 | 22 | import pyarrow as pa |
22 | 23 | import pytest |
23 | 24 | import sedonadb |
| 25 | +import tempfile |
24 | 26 |
|
25 | 27 |
|
26 | 28 | def test_dataframe_from_dataframe(con): |
@@ -281,6 +283,55 @@ def test_dataframe_to_pandas(con): |
281 | 283 | ) |
282 | 284 |
|
283 | 285 |
|
| 286 | +def test_dataframe_to_parquet(con): |
| 287 | + df = con.sql( |
| 288 | + "SELECT * FROM (VALUES ('one', 1), ('two', 2), ('three', 3)) AS t(a, b)" |
| 289 | + ) |
| 290 | + |
| 291 | + with tempfile.TemporaryDirectory() as td: |
| 292 | + # Defaults with a path that ends with .parquet (single file) |
| 293 | + tmp_parquet_file = Path(td) / "tmp.parquet" |
| 294 | + df.to_parquet(tmp_parquet_file) |
| 295 | + |
| 296 | + assert tmp_parquet_file.exists() |
| 297 | + assert tmp_parquet_file.is_file() |
| 298 | + pd.testing.assert_frame_equal( |
| 299 | + pd.read_parquet(tmp_parquet_file), |
| 300 | + pd.DataFrame({"a": ["one", "two", "three"], "b": [1, 2, 3]}), |
| 301 | + ) |
| 302 | + |
| 303 | + # Defaults with a path that doesn't end in .parquet (directory) |
| 304 | + tmp_parquet_dir = Path(td) / "tmp" |
| 305 | + df.to_parquet(tmp_parquet_dir) |
| 306 | + |
| 307 | + assert tmp_parquet_dir.exists() |
| 308 | + assert tmp_parquet_dir.is_dir() |
| 309 | + pd.testing.assert_frame_equal( |
| 310 | + pd.read_parquet(tmp_parquet_dir), |
| 311 | + pd.DataFrame({"a": ["one", "two", "three"], "b": [1, 2, 3]}), |
| 312 | + ) |
| 313 | + |
| 314 | + # With partition_by |
| 315 | + tmp_parquet_dir = Path(td) / "tmp_partitioned" |
| 316 | + df.to_parquet(tmp_parquet_dir, partition_by=["a"]) |
| 317 | + assert tmp_parquet_dir.exists() |
| 318 | + assert tmp_parquet_dir.is_dir() |
| 319 | + pd.testing.assert_frame_equal( |
| 320 | + pd.read_parquet(tmp_parquet_dir).sort_values("b").reset_index(drop=True), |
| 321 | + pd.DataFrame( |
| 322 | + {"b": [1, 2, 3], "a": pd.Categorical(["one", "two", "three"])} |
| 323 | + ), |
| 324 | + ) |
| 325 | + |
| 326 | + # With order_by |
| 327 | + tmp_parquet = Path(td) / "tmp_ordered.parquet" |
| 328 | + df.to_parquet(tmp_parquet, sort_by=["a"]) |
| 329 | + pd.testing.assert_frame_equal( |
| 330 | + pd.read_parquet(tmp_parquet), |
| 331 | + pd.DataFrame({"a": ["one", "three", "two"], "b": [1, 3, 2]}), |
| 332 | + ) |
| 333 | + |
| 334 | + |
284 | 335 | def test_show(con, capsys): |
285 | 336 | con.sql("SELECT 1 as one").show() |
286 | 337 | expected = """ |
|
0 commit comments