|
19 | 19 | from datetime import datetime |
20 | 20 | from decimal import Decimal |
21 | 21 |
|
| 22 | +import pyarrow as pa |
22 | 23 | import pytest |
23 | 24 |
|
24 | 25 | try: |
|
31 | 32 |
|
32 | 33 | from elasticsearch import Elasticsearch |
33 | 34 | from elasticsearch.exceptions import SerializationError |
34 | | -from elasticsearch.serializer import JSONSerializer, OrjsonSerializer, TextSerializer |
| 35 | +from elasticsearch.serializer import ( |
| 36 | + JSONSerializer, |
| 37 | + OrjsonSerializer, |
| 38 | + PyArrowSerializer, |
| 39 | + TextSerializer, |
| 40 | +) |
35 | 41 |
|
36 | 42 | requires_numpy_and_pandas = pytest.mark.skipif( |
37 | 43 | np is None or pd is None, reason="Test requires numpy and pandas to be available" |
@@ -157,6 +163,25 @@ def test_serializes_pandas_category(json_serializer): |
157 | 163 | assert b'{"d":[1,2,3]}' == json_serializer.dumps({"d": cat}) |
158 | 164 |
|
159 | 165 |
|
| 166 | +def test_pyarrow_loads(): |
| 167 | + data = [ |
| 168 | + pa.array([1, 2, 3, 4]), |
| 169 | + pa.array(["foo", "bar", "baz", None]), |
| 170 | + pa.array([True, None, False, True]), |
| 171 | + ] |
| 172 | + batch = pa.record_batch(data, names=["f0", "f1", "f2"]) |
| 173 | + sink = pa.BufferOutputStream() |
| 174 | + with pa.ipc.new_stream(sink, batch.schema) as writer: |
| 175 | + writer.write_batch(batch) |
| 176 | + |
| 177 | + serializer = PyArrowSerializer() |
| 178 | + assert serializer.loads(sink.getvalue()).to_pydict() == { |
| 179 | + "f0": [1, 2, 3, 4], |
| 180 | + "f1": ["foo", "bar", "baz", None], |
| 181 | + "f2": [True, None, False, True], |
| 182 | + } |
| 183 | + |
| 184 | + |
160 | 185 | def test_json_raises_serialization_error_on_dump_error(json_serializer): |
161 | 186 | with pytest.raises(SerializationError): |
162 | 187 | json_serializer.dumps(object()) |
|
0 commit comments