Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions rust/perspective-python/docs/client/to_pandas.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Serialize the data to a `pandas.DataFrame`.
1 change: 1 addition & 0 deletions rust/perspective-python/docs/client/to_polars.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Serialize the data to a `polars.DataFrame`.
1 change: 1 addition & 0 deletions rust/perspective-python/docs/lib.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ guide. In Python, however, Perspective supports additional data types that are
commonly used when processing data:

- `pandas.DataFrame`
- `polars.DataFrame`
- `bytes` (encoding an Apache Arrow)
- `objects` (either extracting a repr or via reference)
- `str` (encoding as a CSV)
Expand Down
251 changes: 251 additions & 0 deletions rust/perspective-python/perspective/tests/table/test_table_polars.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,251 @@
# ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
# ┃ ██████ ██████ ██████ █ █ █ █ █ █▄ ▀███ █ ┃
# ┃ ▄▄▄▄▄█ █▄▄▄▄▄ ▄▄▄▄▄█ ▀▀▀▀▀█▀▀▀▀▀ █ ▀▀▀▀▀█ ████████▌▐███ ███▄ ▀█ █ ▀▀▀▀▀ ┃
# ┃ █▀▀▀▀▀ █▀▀▀▀▀ █▀██▀▀ ▄▄▄▄▄ █ ▄▄▄▄▄█ ▄▄▄▄▄█ ████████▌▐███ █████▄ █ ▄▄▄▄▄ ┃
# ┃ █ ██████ █ ▀█▄ █ ██████ █ ███▌▐███ ███████▄ █ ┃
# ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
# ┃ Copyright (c) 2017, the Perspective Authors. ┃
# ┃ ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ ┃
# ┃ This file is part of the Perspective library, distributed under the terms ┃
# ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃
# ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛

from datetime import date, datetime
import numpy as np
import polars as pl
from pytest import mark
import perspective as psp

client = psp.Server().new_local_client()
Table = client.table


def arrow_bytes_to_polars(view):
import pyarrow

with pyarrow.ipc.open_stream(pyarrow.BufferReader(view.to_arrow())) as reader:
return pl.from_dataframe(reader.read_pandas())


class TestTablePolars(object):
def test_empty_table(self):
tbl = Table([])
assert tbl.size() == 0
assert tbl.schema() == {}

def test_table_dataframe(self):
d = [{"a": 1, "b": 2}, {"a": 3, "b": 4}]
data = pl.DataFrame(d)
tbl = Table(data)
assert tbl.size() == 2
assert tbl.schema() == {"a": "integer", "b": "integer"}
assert tbl.view().to_records() == [
{"a": 1, "b": 2},
{"a": 3, "b": 4},
]

def test_table_lazyframe(self):
d = [{"a": 1, "b": 2}, {"a": 3, "b": 4}]
data = pl.DataFrame(d).lazy()
tbl = Table(data)
assert tbl.size() == 2
assert tbl.schema() == {"a": "integer", "b": "integer"}
assert tbl.view().to_records() == [
{"a": 1, "b": 2},
{"a": 3, "b": 4},
]

def test_table_dataframe_column_order(self):
d = [{"a": 1, "b": 2, "c": 3, "d": 4}, {"a": 3, "b": 4, "c": 5, "d": 6}]
data = pl.DataFrame(d).select(["b", "c", "a", "d"])
tbl = Table(data)
assert tbl.size() == 2
assert tbl.columns() == ["b", "c", "a", "d"]

def test_table_dataframe_selective_column_order(self):
d = [{"a": 1, "b": 2, "c": 3, "d": 4}, {"a": 3, "b": 4, "c": 5, "d": 6}]
data = pl.DataFrame(d).select(["b", "c", "a"])
tbl = Table(data)
assert tbl.size() == 2
assert tbl.columns() == ["b", "c", "a"]

def test_table_dataframe_does_not_mutate(self):
# make sure we don't mutate the dataframe that a user passes in
data = pl.DataFrame(
{
"a": [None, 1, None, 2],
"b": [1.5, None, 2.5, None],
}
)
assert data["a"].to_list() == [None, 1, None, 2]
assert data["b"].to_list() == [1.5, None, 2.5, None]

tbl = Table(data)
assert tbl.size() == 4
assert tbl.schema() == {"a": "integer", "b": "float"}

assert data["a"].to_list() == [None, 1, None, 2]
assert data["b"].to_list() == [1.5, None, 2.5, None]

def test_table_polars_from_schema_int(self):
data = [None, 1, None, 2, None, 3, 4]
df = pl.DataFrame({"a": data})
table = Table({"a": "integer"})
table.update(df)
assert table.view().to_columns()["a"] == data

def test_table_polars_from_schema_bool(self):
data = [True, False, True, False]
df = pl.DataFrame({"a": data})
table = Table({"a": "boolean"})
table.update(df)
assert table.view().to_columns()["a"] == data

def test_table_polars_from_schema_float(self):
data = [None, 1.5, None, 2.5, None, 3.5, 4.5]
df = pl.DataFrame({"a": data})
table = Table({"a": "float"})
table.update(df)
assert table.view().to_columns()["a"] == data

def test_table_polars_from_schema_float_all_nan(self):
data = [np.nan, np.nan, np.nan, np.nan]
df = pl.DataFrame({"a": data})
table = Table({"a": "float"})
table.update(df)
assert table.view().to_columns()["a"] == [None, None, None, None]

def test_table_polars_from_schema_float_to_int(self):
data = [None, 1.5, None, 2.5, None, 3.5, 4.5]
df = pl.DataFrame({"a": data})
table = Table({"a": "integer"})
table.update(df)
# truncates decimal
assert table.view().to_columns()["a"] == [None, 1, None, 2, None, 3, 4]

def test_table_polars_from_schema_int_to_float(self):
data = [None, 1, None, 2, None, 3, 4]
df = pl.DataFrame({"a": data})
table = Table({"a": "float"})
table.update(df)
assert table.view().to_columns()["a"] == [None, 1.0, None, 2.0, None, 3.0, 4.0]

def test_table_polars_from_schema_date(self, util):
data = [date(2019, 8, 15), None, date(2019, 8, 16)]
df = pl.DataFrame({"a": data})
table = Table({"a": "date"})
table.update(df)
assert table.view().to_columns()["a"] == [
util.to_timestamp(datetime(2019, 8, 15)),
None,
util.to_timestamp(datetime(2019, 8, 16)),
]

def test_table_polars_from_schema_str(self):
data = ["a", None, "b", None, "c"]
df = pl.DataFrame({"a": data})
table = Table({"a": "string"})
table.update(df)
assert table.view().to_columns()["a"] == data

def test_table_polars_none(self):
data = [None, None, None]
df = pl.DataFrame({"a": data})
table = Table(df)
assert table.view().to_columns()["a"] == data

def test_table_polars_symmetric_table(self):
# make sure that updates are symmetric to table creation
df = pl.DataFrame({"a": [1, 2, 3, 4], "b": [1.5, 2.5, 3.5, 4.5]})
t1 = Table(df)
t2 = Table({"a": "integer", "b": "float"})
t2.update(df)
assert t1.view().to_columns() == {
"a": [1, 2, 3, 4],
"b": [1.5, 2.5, 3.5, 4.5],
}

def test_table_polars_symmetric_stacked_updates(self):
# make sure that updates are symmetric to table creation
df = pl.DataFrame({"a": [1, 2, 3, 4], "b": [1.5, 2.5, 3.5, 4.5]})

t1 = Table(df)
t1.update(df)

t2 = Table({"a": "integer", "b": "float"})
t2.update(df)
t2.update(df)

assert t1.view().to_columns() == {
"a": [1, 2, 3, 4, 1, 2, 3, 4],
"b": [1.5, 2.5, 3.5, 4.5, 1.5, 2.5, 3.5, 4.5],
}

@mark.skip(reason="Not supported, polars doesnt like input")
def test_table_polars_transitive(self):
# serialized output -> table -> serialized output
records = {
"a": [1, 2, 3, 4],
"b": [1.5, 2.5, 3.5, 4.5],
"c": [np.nan, np.nan, "abc", np.nan],
"d": [None, True, None, False],
"e": [
float("nan"),
datetime(2019, 7, 11, 12, 30),
float("nan"),
datetime(2019, 7, 11, 12, 30),
],
}

df = pl.DataFrame(records, strict=False)
t1 = Table(df)
out1 = arrow_bytes_to_polars(t1.view(columns=["a", "b", "c", "d", "e"]))
t2 = Table(out1)
assert t1.schema() == t2.schema()
out2 = t2.view().to_columns()
assert t1.view().to_columns() == out2

# dtype=object should have correct inferred types

def test_table_polars_object_to_int(self):
df = pl.DataFrame({"a": [1, 2, None, 2, None, 3, 4]})
table = Table(df)
assert table.schema() == {"a": "integer"}
assert table.view().to_columns()["a"] == [1, 2, None, 2, None, 3, 4]

def test_table_polars_object_to_float(self):
df = pl.DataFrame({"a": [None, 1, None, 2, None, 3, 4]})
table = Table(df)
assert table.schema() == {"a": "integer"}
assert table.view().to_columns()["a"] == [None, 1.0, None, 2.0, None, 3.0, 4.0]

def test_table_polars_object_to_bool(self):
df = pl.DataFrame({"a": [True, False, True, False, True, False]})
table = Table(df)
assert table.schema() == {"a": "boolean"}
assert table.view().to_columns()["a"] == [True, False, True, False, True, False]

def test_table_polars_object_to_datetime(self):
df = pl.DataFrame(
{
"a": [
datetime(2019, 7, 11, 1, 2, 3),
datetime(2019, 7, 12, 1, 2, 3),
None,
]
}
)

table = Table(df)
assert table.schema() == {"a": "datetime"}
assert table.view().to_columns()["a"] == [
datetime(2019, 7, 11, 1, 2, 3).timestamp() * 1000,
datetime(2019, 7, 12, 1, 2, 3).timestamp() * 1000,
None,
]

def test_table_polars_object_to_str(self):
df = pl.DataFrame({"a": np.array(["abc", "def", None, "ghi"], dtype=object)})
table = Table(df)
assert table.schema() == {"a": "string"}
assert table.view().to_columns()["a"] == ["abc", "def", None, "ghi"]
1 change: 1 addition & 0 deletions rust/perspective-python/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ maturin==1.6.0
numpy==2.0.0
packaging==24.1
pandas==2.2.2
polars==1.13.1
pyarrow==16.1.0
psutil==6.0.0
pytest==8.2.2
Expand Down
14 changes: 14 additions & 0 deletions rust/perspective-python/src/client/client_sync.rs
Original file line number Diff line number Diff line change
Expand Up @@ -346,11 +346,25 @@ impl View {
self.0.to_csv(window).py_block_on(py)
}

#[doc = include_str!("../../docs/client/to_pandas.md")]
#[pyo3(signature = (**window))]
// #[deprecated(since="3.2.0", note="Please use `View::to_pandas`")]
pub fn to_dataframe(&self, py: Python<'_>, window: Option<Py<PyDict>>) -> PyResult<Py<PyAny>> {
self.0.to_dataframe(window).py_block_on(py)
}

#[doc = include_str!("../../docs/client/to_pandas.md")]
#[pyo3(signature = (**window))]
pub fn to_pandas(&self, py: Python<'_>, window: Option<Py<PyDict>>) -> PyResult<Py<PyAny>> {
self.0.to_dataframe(window).py_block_on(py)
}

#[doc = include_str!("../../docs/client/to_polars.md")]
#[pyo3(signature = (**window))]
pub fn to_polars(&self, py: Python<'_>, window: Option<Py<PyDict>>) -> PyResult<Py<PyAny>> {
self.0.to_polars(window).py_block_on(py)
}

#[doc = crate::inherit_docs!("view/to_arrow.md")]
#[pyo3(signature = (**window))]
pub fn to_arrow(&self, py: Python<'_>, window: Option<Py<PyDict>>) -> PyResult<Py<PyBytes>> {
Expand Down
1 change: 1 addition & 0 deletions rust/perspective-python/src/client/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

pub mod client_sync;
mod pandas;
mod polars;
mod pyarrow;
pub mod python;
pub mod table_data;
Expand Down
Loading
Loading