Skip to content

Commit 90864de

Browse files
authored
Add NarwhalsFilePublisher (#93)
* Add NarwhalsFilePublisher. * Improve consistency with rest of the code. Use narwhals.stable.v1 and use exttypes.narwhals. * Require eager frame in NarwhalsFilePublisher. Signed-off-by: Hin Tse <5867507+hintse@users.noreply.github.com>
1 parent 4fab775 commit 90864de

File tree

3 files changed

+61
-0
lines changed

3 files changed

+61
-0
lines changed

ccflow/publishers/file.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
import pickle
22
from typing import IO, Any, Callable, Dict, Generic
33

4+
import narwhals.stable.v1 as nw
45
import pandas as pd
56
import yaml
67
from cloudpathlib import AnyPath
78
from pydantic import Field, field_validator
89
from typing_extensions import Literal, override
910

1011
from ..exttypes import JinjaTemplate
12+
from ..exttypes.narwhals import DataFrameT
1113
from ..publisher import BasePublisher
1214
from ..serialization import orjson_dumps
1315
from ..utils import PydanticDictOptions, PydanticModelType, dict_to_model
@@ -16,6 +18,7 @@
1618
"DictTemplateFilePublisher",
1719
"GenericFilePublisher",
1820
"JSONPublisher",
21+
"NarwhalsFilePublisher",
1922
"PandasFilePublisher",
2023
"PicklePublisher",
2124
"PydanticJSONPublisher",
@@ -190,3 +193,25 @@ def __call__(self) -> AnyPath:
190193
mode=self.mode,
191194
kwargs=self.kwargs,
192195
)()
196+
197+
198+
class NarwhalsFilePublisher(BasePublisher):
199+
"""Publish a narwhals data frame to a file using an appropriate method on nw.DataFrame."""
200+
201+
data: DataFrameT = None
202+
kwargs: Dict[str, Any] = Field(default_factory=dict)
203+
func: str = "write_csv" # The access function must be able to write to a buffer or file-like object.
204+
suffix: str = ".csv"
205+
mode: Literal["w", "wb"] = "w"
206+
207+
@override
208+
def __call__(self) -> AnyPath:
209+
return GenericFilePublisher(
210+
name=self.name,
211+
name_params=self.name_params,
212+
data=self.data,
213+
dump=getattr(nw.DataFrame, self.func),
214+
suffix=self.suffix,
215+
mode=self.mode,
216+
kwargs=self.kwargs,
217+
)()

ccflow/tests/publishers/test_file.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from pathlib import Path
66
from unittest import TestCase
77

8+
import narwhals.stable.v1 as nw
89
import pandas as pd
910
from pydantic import BaseModel as PydanticBaseModel
1011

@@ -13,6 +14,7 @@
1314
DictTemplateFilePublisher,
1415
GenericFilePublisher,
1516
JSONPublisher,
17+
NarwhalsFilePublisher,
1618
PandasFilePublisher,
1719
PicklePublisher,
1820
YAMLPublisher,
@@ -162,3 +164,36 @@ def test_pandas_feather(self):
162164
self.assertEqual(path, Path("test_pandas.f"))
163165
df = pd.read_feather(path)
164166
pd.testing.assert_frame_equal(df, p.data)
167+
168+
def test_narwhals_csv(self):
169+
with tempfile.TemporaryDirectory() as tempdir:
170+
os.chdir(tempdir)
171+
p = NarwhalsFilePublisher(
172+
name="test_{{param}}",
173+
name_params={"param": "narwhals"},
174+
func="write_csv",
175+
suffix=".csv",
176+
)
177+
df = pd.DataFrame({"a": [1, 2, 3], "b": ["foo", "bar", "baz"]})
178+
p.data = nw.from_native(df)
179+
path = p()
180+
self.assertEqual(path, Path("test_narwhals.csv"))
181+
df2 = pd.read_csv(path)
182+
pd.testing.assert_frame_equal(df, df2)
183+
184+
def test_narwhals_parquet(self):
185+
with tempfile.TemporaryDirectory() as tempdir:
186+
os.chdir(tempdir)
187+
p = NarwhalsFilePublisher(
188+
name="test_{{param}}",
189+
name_params={"param": "narwhals"},
190+
func="write_parquet",
191+
suffix=".parquet",
192+
mode="wb",
193+
)
194+
df = pd.DataFrame({"a": [1, 2, 3], "b": ["foo", "bar", "baz"]})
195+
p.data = nw.from_native(df)
196+
path = p()
197+
self.assertEqual(path, Path("test_narwhals.parquet"))
198+
df2 = pd.read_parquet(path)
199+
pd.testing.assert_frame_equal(df, df2)

docs/wiki/Key-Features.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ The following table summarizes the "publisher" models.
5858
| `GenericFilePublisher` | `ccflow.publishers` | Publish data using a generic "dump" Callable. Uses `smart_open` under the hood so that local and cloud paths are supported. |
5959
| `JSONPublisher` | `ccflow.publishers` | Publish data to file in JSON format. |
6060
| `PandasFilePublisher` | `ccflow.publishers` | Publish a pandas data frame to a file using an appropriate method on pd.DataFrame. For large-scale exporting (using parquet), see `PandasParquetPublisher`. |
61+
| `NarwhalsFilePublisher` | `ccflow.publishers` | Publish a narwhals data frame to a file using an appropriate method on nw.DataFrame. |
6162
| `PicklePublisher` | `ccflow.publishers` | Publish data to a pickle file. |
6263
| `PydanticJSONPublisher` | `ccflow.publishers` | Publish a pydantic model to a json file. See [Pydantic modeljson](https://docs.pydantic.dev/latest/concepts/serialization/#modelmodel_dump) |
6364
| `YAMLPublisher` | `ccflow.publishers` | Publish data to file in YAML format. |

0 commit comments

Comments
 (0)