|
| 1 | +# ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ |
| 2 | +# ┃ ██████ ██████ ██████ █ █ █ █ █ █▄ ▀███ █ ┃ |
| 3 | +# ┃ ▄▄▄▄▄█ █▄▄▄▄▄ ▄▄▄▄▄█ ▀▀▀▀▀█▀▀▀▀▀ █ ▀▀▀▀▀█ ████████▌▐███ ███▄ ▀█ █ ▀▀▀▀▀ ┃ |
| 4 | +# ┃ █▀▀▀▀▀ █▀▀▀▀▀ █▀██▀▀ ▄▄▄▄▄ █ ▄▄▄▄▄█ ▄▄▄▄▄█ ████████▌▐███ █████▄ █ ▄▄▄▄▄ ┃ |
| 5 | +# ┃ █ ██████ █ ▀█▄ █ ██████ █ ███▌▐███ ███████▄ █ ┃ |
| 6 | +# ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫ |
| 7 | +# ┃ Copyright (c) 2017, the Perspective Authors. ┃ |
| 8 | +# ┃ ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ ┃ |
| 9 | +# ┃ This file is part of the Perspective library, distributed under the terms ┃ |
| 10 | +# ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃ |
| 11 | +# ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ |
| 12 | + |
| 13 | +from datetime import date, datetime |
| 14 | +import numpy as np |
| 15 | +import polars as pl |
| 16 | +from pytest import mark |
| 17 | +import perspective as psp |
| 18 | + |
| 19 | +client = psp.Server().new_local_client() |
| 20 | +Table = client.table |
| 21 | + |
| 22 | + |
| 23 | +def arrow_bytes_to_polars(view): |
| 24 | + import pyarrow |
| 25 | + |
| 26 | + with pyarrow.ipc.open_stream(pyarrow.BufferReader(view.to_arrow())) as reader: |
| 27 | + return pl.from_dataframe(reader.read_pandas()) |
| 28 | + |
| 29 | + |
| 30 | +class TestTablePolars(object): |
| 31 | + def test_empty_table(self): |
| 32 | + tbl = Table([]) |
| 33 | + assert tbl.size() == 0 |
| 34 | + assert tbl.schema() == {} |
| 35 | + |
| 36 | + def test_table_dataframe(self): |
| 37 | + d = [{"a": 1, "b": 2}, {"a": 3, "b": 4}] |
| 38 | + data = pl.DataFrame(d) |
| 39 | + tbl = Table(data) |
| 40 | + assert tbl.size() == 2 |
| 41 | + assert tbl.schema() == {"a": "integer", "b": "integer"} |
| 42 | + assert tbl.view().to_records() == [ |
| 43 | + {"a": 1, "b": 2}, |
| 44 | + {"a": 3, "b": 4}, |
| 45 | + ] |
| 46 | + |
| 47 | + def test_table_dataframe_column_order(self): |
| 48 | + d = [{"a": 1, "b": 2, "c": 3, "d": 4}, {"a": 3, "b": 4, "c": 5, "d": 6}] |
| 49 | + data = pl.DataFrame(d).select(["b", "c", "a", "d"]) |
| 50 | + tbl = Table(data) |
| 51 | + assert tbl.size() == 2 |
| 52 | + assert tbl.columns() == ["b", "c", "a", "d"] |
| 53 | + |
| 54 | + def test_table_dataframe_selective_column_order(self): |
| 55 | + d = [{"a": 1, "b": 2, "c": 3, "d": 4}, {"a": 3, "b": 4, "c": 5, "d": 6}] |
| 56 | + data = pl.DataFrame(d).select(["b", "c", "a"]) |
| 57 | + tbl = Table(data) |
| 58 | + assert tbl.size() == 2 |
| 59 | + assert tbl.columns() == ["b", "c", "a"] |
| 60 | + |
| 61 | + @mark.skip(reason="Not supported, polars converts to fixed_size_binary") |
| 62 | + def test_table_dataframe_does_not_mutate(self): |
| 63 | + # make sure we don't mutate the dataframe that a user passes in |
| 64 | + data = pl.DataFrame( |
| 65 | + { |
| 66 | + "a": np.array([None, 1, None, 2], dtype=object), |
| 67 | + "b": np.array([1.5, None, 2.5, None], dtype=object), |
| 68 | + } |
| 69 | + ) |
| 70 | + assert data["a"].to_list() == [None, 1, None, 2] |
| 71 | + assert data["b"].to_list() == [1.5, None, 2.5, None] |
| 72 | + |
| 73 | + tbl = Table(data) |
| 74 | + assert tbl.size() == 4 |
| 75 | + assert tbl.schema() == {"a": "integer", "b": "float"} |
| 76 | + |
| 77 | + assert data["a"].to_list() == [None, 1, None, 2] |
| 78 | + assert data["b"].to_list() == [1.5, None, 2.5, None] |
| 79 | + |
| 80 | + def test_table_polars_from_schema_int(self): |
| 81 | + data = [None, 1, None, 2, None, 3, 4] |
| 82 | + df = pl.DataFrame({"a": data}) |
| 83 | + table = Table({"a": "integer"}) |
| 84 | + table.update(df) |
| 85 | + assert table.view().to_columns()["a"] == data |
| 86 | + |
| 87 | + def test_table_polars_from_schema_bool(self): |
| 88 | + data = [True, False, True, False] |
| 89 | + df = pl.DataFrame({"a": data}) |
| 90 | + table = Table({"a": "boolean"}) |
| 91 | + table.update(df) |
| 92 | + assert table.view().to_columns()["a"] == data |
| 93 | + |
| 94 | + def test_table_polars_from_schema_float(self): |
| 95 | + data = [None, 1.5, None, 2.5, None, 3.5, 4.5] |
| 96 | + df = pl.DataFrame({"a": data}) |
| 97 | + table = Table({"a": "float"}) |
| 98 | + table.update(df) |
| 99 | + assert table.view().to_columns()["a"] == data |
| 100 | + |
| 101 | + def test_table_polars_from_schema_float_all_nan(self): |
| 102 | + data = [np.nan, np.nan, np.nan, np.nan] |
| 103 | + df = pl.DataFrame({"a": data}) |
| 104 | + table = Table({"a": "float"}) |
| 105 | + table.update(df) |
| 106 | + assert table.view().to_columns()["a"] == [None, None, None, None] |
| 107 | + |
| 108 | + def test_table_polars_from_schema_float_to_int(self): |
| 109 | + data = [None, 1.5, None, 2.5, None, 3.5, 4.5] |
| 110 | + df = pl.DataFrame({"a": data}) |
| 111 | + table = Table({"a": "integer"}) |
| 112 | + table.update(df) |
| 113 | + # truncates decimal |
| 114 | + assert table.view().to_columns()["a"] == [None, 1, None, 2, None, 3, 4] |
| 115 | + |
| 116 | + def test_table_polars_from_schema_int_to_float(self): |
| 117 | + data = [None, 1, None, 2, None, 3, 4] |
| 118 | + df = pl.DataFrame({"a": data}) |
| 119 | + table = Table({"a": "float"}) |
| 120 | + table.update(df) |
| 121 | + assert table.view().to_columns()["a"] == [None, 1.0, None, 2.0, None, 3.0, 4.0] |
| 122 | + |
| 123 | + def test_table_polars_from_schema_date(self, util): |
| 124 | + data = [date(2019, 8, 15), None, date(2019, 8, 16)] |
| 125 | + df = pl.DataFrame({"a": data}) |
| 126 | + table = Table({"a": "date"}) |
| 127 | + table.update(df) |
| 128 | + assert table.view().to_columns()["a"] == [ |
| 129 | + util.to_timestamp(datetime(2019, 8, 15)), |
| 130 | + None, |
| 131 | + util.to_timestamp(datetime(2019, 8, 16)), |
| 132 | + ] |
| 133 | + |
| 134 | + def test_table_polars_from_schema_str(self): |
| 135 | + data = ["a", None, "b", None, "c"] |
| 136 | + df = pl.DataFrame({"a": data}) |
| 137 | + table = Table({"a": "string"}) |
| 138 | + table.update(df) |
| 139 | + assert table.view().to_columns()["a"] == data |
| 140 | + |
| 141 | + def test_table_polars_none(self): |
| 142 | + data = [None, None, None] |
| 143 | + df = pl.DataFrame({"a": data}) |
| 144 | + table = Table(df) |
| 145 | + assert table.view().to_columns()["a"] == data |
| 146 | + |
| 147 | + def test_table_polars_symmetric_table(self): |
| 148 | + # make sure that updates are symmetric to table creation |
| 149 | + df = pl.DataFrame({"a": [1, 2, 3, 4], "b": [1.5, 2.5, 3.5, 4.5]}) |
| 150 | + t1 = Table(df) |
| 151 | + t2 = Table({"a": "integer", "b": "float"}) |
| 152 | + t2.update(df) |
| 153 | + assert t1.view().to_columns() == { |
| 154 | + "a": [1, 2, 3, 4], |
| 155 | + "b": [1.5, 2.5, 3.5, 4.5], |
| 156 | + } |
| 157 | + |
| 158 | + def test_table_polars_symmetric_stacked_updates(self): |
| 159 | + # make sure that updates are symmetric to table creation |
| 160 | + df = pl.DataFrame({"a": [1, 2, 3, 4], "b": [1.5, 2.5, 3.5, 4.5]}) |
| 161 | + |
| 162 | + t1 = Table(df) |
| 163 | + t1.update(df) |
| 164 | + |
| 165 | + t2 = Table({"a": "integer", "b": "float"}) |
| 166 | + t2.update(df) |
| 167 | + t2.update(df) |
| 168 | + |
| 169 | + assert t1.view().to_columns() == { |
| 170 | + "a": [1, 2, 3, 4, 1, 2, 3, 4], |
| 171 | + "b": [1.5, 2.5, 3.5, 4.5, 1.5, 2.5, 3.5, 4.5], |
| 172 | + } |
| 173 | + |
| 174 | + @mark.skip(reason="Not supported, polars doesnt like input") |
| 175 | + def test_table_polars_transitive(self): |
| 176 | + # serialized output -> table -> serialized output |
| 177 | + records = { |
| 178 | + "a": [1, 2, 3, 4], |
| 179 | + "b": [1.5, 2.5, 3.5, 4.5], |
| 180 | + "c": [np.nan, np.nan, "abc", np.nan], |
| 181 | + "d": [None, True, None, False], |
| 182 | + "e": [ |
| 183 | + float("nan"), |
| 184 | + datetime(2019, 7, 11, 12, 30), |
| 185 | + float("nan"), |
| 186 | + datetime(2019, 7, 11, 12, 30), |
| 187 | + ], |
| 188 | + } |
| 189 | + |
| 190 | + df = pl.DataFrame(records, strict=False) |
| 191 | + t1 = Table(df) |
| 192 | + out1 = arrow_bytes_to_polars(t1.view(columns=["a", "b", "c", "d", "e"])) |
| 193 | + t2 = Table(out1) |
| 194 | + assert t1.schema() == t2.schema() |
| 195 | + out2 = t2.view().to_columns() |
| 196 | + assert t1.view().to_columns() == out2 |
| 197 | + |
| 198 | + # dtype=object should have correct inferred types |
| 199 | + |
| 200 | + @mark.skip(reason="Not supported, polars converts to fixed_size_binary") |
| 201 | + def test_table_polars_object_to_int(self): |
| 202 | + df = pl.DataFrame({"a": np.array([1, 2, None, 2, None, 3, 4], dtype=object)}) |
| 203 | + table = Table(df) |
| 204 | + assert table.schema() == {"a": "integer"} |
| 205 | + assert table.view().to_columns()["a"] == [1, 2, None, 2, None, 3, 4] |
| 206 | + |
| 207 | + @mark.skip(reason="Not supported, polars converts to fixed_size_binary") |
| 208 | + def test_table_polars_object_to_float(self): |
| 209 | + df = pl.DataFrame({"a": np.array([None, 1, None, 2, None, 3, 4], dtype=object)}) |
| 210 | + table = Table(df) |
| 211 | + assert table.schema() == {"a": "integer"} |
| 212 | + assert table.view().to_columns()["a"] == [None, 1.0, None, 2.0, None, 3.0, 4.0] |
| 213 | + |
| 214 | + @mark.skip(reason="Not supported, polars converts to fixed_size_binary") |
| 215 | + def test_table_polars_object_to_bool(self): |
| 216 | + df = pl.DataFrame( |
| 217 | + {"a": np.array([True, False, True, False, True, False], dtype=object)} |
| 218 | + ) |
| 219 | + table = Table(df) |
| 220 | + assert table.schema() == {"a": "boolean"} |
| 221 | + assert table.view().to_columns()["a"] == [True, False, True, False, True, False] |
| 222 | + |
| 223 | + |
| 224 | + @mark.skip(reason="Not supported, polars converts to fixed_size_binary") |
| 225 | + def test_table_polars_object_to_datetime(self): |
| 226 | + df = pl.DataFrame( |
| 227 | + { |
| 228 | + "a": np.array( |
| 229 | + [ |
| 230 | + datetime(2019, 7, 11, 1, 2, 3), |
| 231 | + datetime(2019, 7, 12, 1, 2, 3), |
| 232 | + None, |
| 233 | + ], |
| 234 | + dtype=object, |
| 235 | + ) |
| 236 | + } |
| 237 | + ) |
| 238 | + table = Table(df) |
| 239 | + assert table.schema() == {"a": "datetime"} |
| 240 | + assert table.view().to_columns()["a"] == [ |
| 241 | + datetime(2019, 7, 11, 1, 2, 3), |
| 242 | + datetime(2019, 7, 12, 1, 2, 3), |
| 243 | + None, |
| 244 | + ] |
| 245 | + |
| 246 | + def test_table_polars_object_to_str(self): |
| 247 | + df = pl.DataFrame({"a": np.array(["abc", "def", None, "ghi"], dtype=object)}) |
| 248 | + table = Table(df) |
| 249 | + assert table.schema() == {"a": "string"} |
| 250 | + assert table.view().to_columns()["a"] == ["abc", "def", None, "ghi"] |
0 commit comments