|
12 | 12 | # See the License for the specific language governing permissions and
|
13 | 13 | # limitations under the License.
|
14 | 14 |
|
| 15 | +import io |
15 | 16 | import json
|
16 | 17 | import tempfile
|
17 | 18 | import unittest
|
|
22 | 23 | from test.utils import AllowListEventListener, NullsTestMixin
|
23 | 24 |
|
24 | 25 | import pyarrow as pa
|
| 26 | +import pyarrow.json |
25 | 27 | import pymongo
|
26 |
| -from bson import Binary, Code, CodecOptions, Decimal128, ObjectId |
| 28 | +from bson import Binary, Code, CodecOptions, Decimal128, ObjectId, json_util |
27 | 29 | from pyarrow import (
|
28 | 30 | Table,
|
29 | 31 | bool_,
|
@@ -1021,6 +1023,35 @@ def test_decimal128(self):
|
1021 | 1023 | coll_data = list(self.coll.find({}))
|
1022 | 1024 | assert coll_data[0]["data"] == Decimal128(a)
|
1023 | 1025 |
|
| 1026 | + def test_empty_embedded_array(self): |
| 1027 | + # From INTPYTHON-575. |
| 1028 | + self.coll.drop() |
| 1029 | + |
| 1030 | + self.coll.insert_many( |
| 1031 | + [{"_id": 1, "foo": {"bar": ["1", "2"]}}, {"_id": 2, "foo": {"bar": []}}] |
| 1032 | + ) |
| 1033 | + |
| 1034 | + # get document out of mongo, put it in a file and read it with pyarrow and write it to parquet. |
| 1035 | + doc1 = self.coll.find_one({"_id": 1}) |
| 1036 | + string1 = json_util.dumps(doc1, indent=2) |
| 1037 | + file1 = io.BytesIO(bytes(string1, encoding="utf-8")) |
| 1038 | + papatable1 = pyarrow.json.read_json(file1) |
| 1039 | + write_table(papatable1, io.BytesIO()) |
| 1040 | + |
| 1041 | + # read document with pymongoarrow and write it to parquet. |
| 1042 | + pmapatable1 = find_arrow_all(self.coll, {"_id": {"$eq": 1}}) |
| 1043 | + write_table(pmapatable1, io.BytesIO()) |
| 1044 | + |
| 1045 | + doc2 = self.coll.find_one({"_id": 2}) |
| 1046 | + string2 = json_util.dumps(doc2, indent=2) |
| 1047 | + file2 = io.BytesIO(bytes(string2, encoding="utf-8")) |
| 1048 | + papatable2 = pyarrow.json.read_json(file2) |
| 1049 | + write_table(papatable2, io.BytesIO()) |
| 1050 | + |
| 1051 | + pmapatable2 = find_arrow_all(self.coll, {"_id": {"$eq": 2}}) |
| 1052 | + assert pmapatable2.to_pylist()[0] == doc2 |
| 1053 | + write_table(pmapatable2, io.BytesIO()) |
| 1054 | + |
1024 | 1055 |
|
1025 | 1056 | class TestArrowExplicitApi(ArrowApiTestMixin, unittest.TestCase):
|
1026 | 1057 | def run_find(self, *args, **kwargs):
|
|
0 commit comments