Skip to content

Commit 4d0f8be

Browse files
committed
Support input/output to/from polars via python (not native rust)
Signed-off-by: Tim Paine <[email protected]>
1 parent 126a6a1 commit 4d0f8be

File tree

6 files changed

+397
-1
lines changed

6 files changed

+397
-1
lines changed
Lines changed: 250 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
# ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
2+
# ┃ ██████ ██████ ██████ █ █ █ █ █ █▄ ▀███ █ ┃
3+
# ┃ ▄▄▄▄▄█ █▄▄▄▄▄ ▄▄▄▄▄█ ▀▀▀▀▀█▀▀▀▀▀ █ ▀▀▀▀▀█ ████████▌▐███ ███▄ ▀█ █ ▀▀▀▀▀ ┃
4+
# ┃ █▀▀▀▀▀ █▀▀▀▀▀ █▀██▀▀ ▄▄▄▄▄ █ ▄▄▄▄▄█ ▄▄▄▄▄█ ████████▌▐███ █████▄ █ ▄▄▄▄▄ ┃
5+
# ┃ █ ██████ █ ▀█▄ █ ██████ █ ███▌▐███ ███████▄ █ ┃
6+
# ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
7+
# ┃ Copyright (c) 2017, the Perspective Authors. ┃
8+
# ┃ ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ ┃
9+
# ┃ This file is part of the Perspective library, distributed under the terms ┃
10+
# ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃
11+
# ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
12+
13+
from datetime import date, datetime
14+
import numpy as np
15+
import polars as pl
16+
from pytest import mark
17+
import perspective as psp
18+
19+
client = psp.Server().new_local_client()
20+
Table = client.table
21+
22+
23+
def arrow_bytes_to_polars(view):
24+
import pyarrow
25+
26+
with pyarrow.ipc.open_stream(pyarrow.BufferReader(view.to_arrow())) as reader:
27+
return pl.from_dataframe(reader.read_pandas())
28+
29+
30+
class TestTablePolars(object):
31+
def test_empty_table(self):
32+
tbl = Table([])
33+
assert tbl.size() == 0
34+
assert tbl.schema() == {}
35+
36+
def test_table_dataframe(self):
37+
d = [{"a": 1, "b": 2}, {"a": 3, "b": 4}]
38+
data = pl.DataFrame(d)
39+
tbl = Table(data)
40+
assert tbl.size() == 2
41+
assert tbl.schema() == {"a": "integer", "b": "integer"}
42+
assert tbl.view().to_records() == [
43+
{"a": 1, "b": 2},
44+
{"a": 3, "b": 4},
45+
]
46+
47+
def test_table_dataframe_column_order(self):
48+
d = [{"a": 1, "b": 2, "c": 3, "d": 4}, {"a": 3, "b": 4, "c": 5, "d": 6}]
49+
data = pl.DataFrame(d).select(["b", "c", "a", "d"])
50+
tbl = Table(data)
51+
assert tbl.size() == 2
52+
assert tbl.columns() == ["b", "c", "a", "d"]
53+
54+
def test_table_dataframe_selective_column_order(self):
55+
d = [{"a": 1, "b": 2, "c": 3, "d": 4}, {"a": 3, "b": 4, "c": 5, "d": 6}]
56+
data = pl.DataFrame(d).select(["b", "c", "a"])
57+
tbl = Table(data)
58+
assert tbl.size() == 2
59+
assert tbl.columns() == ["b", "c", "a"]
60+
61+
@mark.skip(reason="Not supported, polars converts to fixed_size_binary")
62+
def test_table_dataframe_does_not_mutate(self):
63+
# make sure we don't mutate the dataframe that a user passes in
64+
data = pl.DataFrame(
65+
{
66+
"a": np.array([None, 1, None, 2], dtype=object),
67+
"b": np.array([1.5, None, 2.5, None], dtype=object),
68+
}
69+
)
70+
assert data["a"].to_list() == [None, 1, None, 2]
71+
assert data["b"].to_list() == [1.5, None, 2.5, None]
72+
73+
tbl = Table(data)
74+
assert tbl.size() == 4
75+
assert tbl.schema() == {"a": "integer", "b": "float"}
76+
77+
assert data["a"].to_list() == [None, 1, None, 2]
78+
assert data["b"].to_list() == [1.5, None, 2.5, None]
79+
80+
def test_table_polars_from_schema_int(self):
81+
data = [None, 1, None, 2, None, 3, 4]
82+
df = pl.DataFrame({"a": data})
83+
table = Table({"a": "integer"})
84+
table.update(df)
85+
assert table.view().to_columns()["a"] == data
86+
87+
def test_table_polars_from_schema_bool(self):
88+
data = [True, False, True, False]
89+
df = pl.DataFrame({"a": data})
90+
table = Table({"a": "boolean"})
91+
table.update(df)
92+
assert table.view().to_columns()["a"] == data
93+
94+
def test_table_polars_from_schema_float(self):
95+
data = [None, 1.5, None, 2.5, None, 3.5, 4.5]
96+
df = pl.DataFrame({"a": data})
97+
table = Table({"a": "float"})
98+
table.update(df)
99+
assert table.view().to_columns()["a"] == data
100+
101+
def test_table_polars_from_schema_float_all_nan(self):
102+
data = [np.nan, np.nan, np.nan, np.nan]
103+
df = pl.DataFrame({"a": data})
104+
table = Table({"a": "float"})
105+
table.update(df)
106+
assert table.view().to_columns()["a"] == [None, None, None, None]
107+
108+
def test_table_polars_from_schema_float_to_int(self):
109+
data = [None, 1.5, None, 2.5, None, 3.5, 4.5]
110+
df = pl.DataFrame({"a": data})
111+
table = Table({"a": "integer"})
112+
table.update(df)
113+
# truncates decimal
114+
assert table.view().to_columns()["a"] == [None, 1, None, 2, None, 3, 4]
115+
116+
def test_table_polars_from_schema_int_to_float(self):
117+
data = [None, 1, None, 2, None, 3, 4]
118+
df = pl.DataFrame({"a": data})
119+
table = Table({"a": "float"})
120+
table.update(df)
121+
assert table.view().to_columns()["a"] == [None, 1.0, None, 2.0, None, 3.0, 4.0]
122+
123+
def test_table_polars_from_schema_date(self, util):
124+
data = [date(2019, 8, 15), None, date(2019, 8, 16)]
125+
df = pl.DataFrame({"a": data})
126+
table = Table({"a": "date"})
127+
table.update(df)
128+
assert table.view().to_columns()["a"] == [
129+
util.to_timestamp(datetime(2019, 8, 15)),
130+
None,
131+
util.to_timestamp(datetime(2019, 8, 16)),
132+
]
133+
134+
def test_table_polars_from_schema_str(self):
135+
data = ["a", None, "b", None, "c"]
136+
df = pl.DataFrame({"a": data})
137+
table = Table({"a": "string"})
138+
table.update(df)
139+
assert table.view().to_columns()["a"] == data
140+
141+
def test_table_polars_none(self):
142+
data = [None, None, None]
143+
df = pl.DataFrame({"a": data})
144+
table = Table(df)
145+
assert table.view().to_columns()["a"] == data
146+
147+
def test_table_polars_symmetric_table(self):
148+
# make sure that updates are symmetric to table creation
149+
df = pl.DataFrame({"a": [1, 2, 3, 4], "b": [1.5, 2.5, 3.5, 4.5]})
150+
t1 = Table(df)
151+
t2 = Table({"a": "integer", "b": "float"})
152+
t2.update(df)
153+
assert t1.view().to_columns() == {
154+
"a": [1, 2, 3, 4],
155+
"b": [1.5, 2.5, 3.5, 4.5],
156+
}
157+
158+
def test_table_polars_symmetric_stacked_updates(self):
159+
# make sure that updates are symmetric to table creation
160+
df = pl.DataFrame({"a": [1, 2, 3, 4], "b": [1.5, 2.5, 3.5, 4.5]})
161+
162+
t1 = Table(df)
163+
t1.update(df)
164+
165+
t2 = Table({"a": "integer", "b": "float"})
166+
t2.update(df)
167+
t2.update(df)
168+
169+
assert t1.view().to_columns() == {
170+
"a": [1, 2, 3, 4, 1, 2, 3, 4],
171+
"b": [1.5, 2.5, 3.5, 4.5, 1.5, 2.5, 3.5, 4.5],
172+
}
173+
174+
@mark.skip(reason="Not supported, polars doesnt like input")
175+
def test_table_polars_transitive(self):
176+
# serialized output -> table -> serialized output
177+
records = {
178+
"a": [1, 2, 3, 4],
179+
"b": [1.5, 2.5, 3.5, 4.5],
180+
"c": [np.nan, np.nan, "abc", np.nan],
181+
"d": [None, True, None, False],
182+
"e": [
183+
float("nan"),
184+
datetime(2019, 7, 11, 12, 30),
185+
float("nan"),
186+
datetime(2019, 7, 11, 12, 30),
187+
],
188+
}
189+
190+
df = pl.DataFrame(records, strict=False)
191+
t1 = Table(df)
192+
out1 = arrow_bytes_to_polars(t1.view(columns=["a", "b", "c", "d", "e"]))
193+
t2 = Table(out1)
194+
assert t1.schema() == t2.schema()
195+
out2 = t2.view().to_columns()
196+
assert t1.view().to_columns() == out2
197+
198+
# dtype=object should have correct inferred types
199+
200+
@mark.skip(reason="Not supported, polars converts to fixed_size_binary")
201+
def test_table_polars_object_to_int(self):
202+
df = pl.DataFrame({"a": np.array([1, 2, None, 2, None, 3, 4], dtype=object)})
203+
table = Table(df)
204+
assert table.schema() == {"a": "integer"}
205+
assert table.view().to_columns()["a"] == [1, 2, None, 2, None, 3, 4]
206+
207+
@mark.skip(reason="Not supported, polars converts to fixed_size_binary")
208+
def test_table_polars_object_to_float(self):
209+
df = pl.DataFrame({"a": np.array([None, 1, None, 2, None, 3, 4], dtype=object)})
210+
table = Table(df)
211+
assert table.schema() == {"a": "integer"}
212+
assert table.view().to_columns()["a"] == [None, 1.0, None, 2.0, None, 3.0, 4.0]
213+
214+
@mark.skip(reason="Not supported, polars converts to fixed_size_binary")
215+
def test_table_polars_object_to_bool(self):
216+
df = pl.DataFrame(
217+
{"a": np.array([True, False, True, False, True, False], dtype=object)}
218+
)
219+
table = Table(df)
220+
assert table.schema() == {"a": "boolean"}
221+
assert table.view().to_columns()["a"] == [True, False, True, False, True, False]
222+
223+
224+
@mark.skip(reason="Not supported, polars converts to fixed_size_binary")
225+
def test_table_polars_object_to_datetime(self):
226+
df = pl.DataFrame(
227+
{
228+
"a": np.array(
229+
[
230+
datetime(2019, 7, 11, 1, 2, 3),
231+
datetime(2019, 7, 12, 1, 2, 3),
232+
None,
233+
],
234+
dtype=object,
235+
)
236+
}
237+
)
238+
table = Table(df)
239+
assert table.schema() == {"a": "datetime"}
240+
assert table.view().to_columns()["a"] == [
241+
datetime(2019, 7, 11, 1, 2, 3),
242+
datetime(2019, 7, 12, 1, 2, 3),
243+
None,
244+
]
245+
246+
def test_table_polars_object_to_str(self):
247+
df = pl.DataFrame({"a": np.array(["abc", "def", None, "ghi"], dtype=object)})
248+
table = Table(df)
249+
assert table.schema() == {"a": "string"}
250+
assert table.view().to_columns()["a"] == ["abc", "def", None, "ghi"]

rust/perspective-python/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ maturin==1.6.0
88
numpy==2.0.0
99
packaging==24.1
1010
pandas==2.2.2
11+
polars==1.13.1
1112
pyarrow==16.1.0
1213
psutil==6.0.0
1314
pytest==8.2.2

rust/perspective-python/src/client/client_sync.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,11 @@ impl View {
351351
self.0.to_dataframe(window).py_block_on(py)
352352
}
353353

354+
#[pyo3(signature = (**window))]
355+
pub fn to_polars(&self, py: Python<'_>, window: Option<Py<PyDict>>) -> PyResult<Py<PyAny>> {
356+
self.0.to_polars(window).py_block_on(py)
357+
}
358+
354359
#[doc = crate::inherit_docs!("view/to_arrow.md")]
355360
#[pyo3(signature = (**window))]
356361
pub fn to_arrow(&self, py: Python<'_>, window: Option<Py<PyDict>>) -> PyResult<Py<PyBytes>> {

rust/perspective-python/src/client/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
pub mod client_sync;
1414
mod pandas;
15+
mod polars;
1516
mod pyarrow;
1617
pub mod python;
1718
pub mod table_data;

0 commit comments

Comments
 (0)