Skip to content

Commit a984af8

Browse files
authored
INTPYTHON-575 Fix handling of empty embedded arrays (#296)
1 parent 20831db commit a984af8

File tree

5 files changed

+50
-56
lines changed

5 files changed

+50
-56
lines changed

bindings/python/pymongoarrow/context.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def _parse_builder_map(builder_map):
7575
elif value.type_marker == _BsonArrowTypes.array.value:
7676
child_name = key + "[]"
7777
to_remove.append(child_name)
78-
child = builder_map[child_name]
78+
child = builder_map.get(child_name, [])
7979
builder_map[key] = ListArray.from_arrays(value.finish(), child)
8080
else:
8181
builder_map[key] = value.finish()

bindings/python/test/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14+
1415
import pymongo
1516

1617

@@ -42,4 +43,3 @@ def init(self):
4243

4344

4445
client_context = ClientContext()
45-
client_context.init()

bindings/python/test/conftest.py

Lines changed: 12 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2023-present MongoDB, Inc.
1+
# Copyright 2025-present MongoDB, Inc.
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License");
44
# you may not use this file except in compliance with the License.
@@ -11,59 +11,18 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
import numpy as np
15-
import pandas as pd
16-
import pytest
17-
18-
# Fixtures for use with Pandas extension types.
19-
20-
21-
@pytest.fixture
22-
def data_for_twos(dtype):
23-
return pd.array(np.ones(100), dtype=dtype)
24-
25-
26-
@pytest.fixture
27-
def na_value():
28-
return np.nan
29-
30-
31-
@pytest.fixture
32-
def na_cmp():
33-
def cmp(a, b):
34-
return np.isnan(a) and np.isnan(b)
35-
36-
return cmp
37-
38-
39-
@pytest.fixture(params=[True, False])
40-
def box_in_series(request):
41-
"""Whether to box the data in a Series"""
42-
return request.param
43-
44-
45-
@pytest.fixture(params=[True, False])
46-
def as_array(request):
47-
"""
48-
Boolean fixture to support ExtensionDtype _from_sequence method testing.
49-
"""
50-
return request.param
14+
from test import client_context
5115

16+
import pytest
5217

53-
@pytest.fixture(params=["ffill", "bfill"])
54-
def fillna_method(request):
55-
"""
56-
Parametrized fixture giving method parameters 'ffill' and 'bfill' for
57-
Series.fillna(method=<method>) testing.
58-
"""
59-
return request.param
18+
pytest_plugins = [
19+
"pandas.tests.extension.conftest",
20+
]
6021

6122

62-
@pytest.fixture
63-
def invalid_scalar(data):
64-
"""
65-
A scalar that *cannot* be held by this ExtensionArray.
66-
The default should work for most subclasses, but is not guaranteed.
67-
If the array can hold any item (i.e. object dtype), then use pytest.skip.
68-
"""
69-
return object.__new__(object)
23+
@pytest.fixture(autouse=True, scope="session")
24+
def client():
25+
client_context.init()
26+
yield
27+
if client_context.client:
28+
client_context.client.close()

bindings/python/test/test_arrow.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import io
1516
import json
1617
import tempfile
1718
import unittest
@@ -22,8 +23,9 @@
2223
from test.utils import AllowListEventListener, NullsTestMixin
2324

2425
import pyarrow as pa
26+
import pyarrow.json
2527
import pymongo
26-
from bson import Binary, Code, CodecOptions, Decimal128, ObjectId
28+
from bson import Binary, Code, CodecOptions, Decimal128, ObjectId, json_util
2729
from pyarrow import (
2830
Table,
2931
bool_,
@@ -1021,6 +1023,35 @@ def test_decimal128(self):
10211023
coll_data = list(self.coll.find({}))
10221024
assert coll_data[0]["data"] == Decimal128(a)
10231025

1026+
def test_empty_embedded_array(self):
1027+
# From INTPYTHON-575.
1028+
self.coll.drop()
1029+
1030+
self.coll.insert_many(
1031+
[{"_id": 1, "foo": {"bar": ["1", "2"]}}, {"_id": 2, "foo": {"bar": []}}]
1032+
)
1033+
1034+
# get document out of mongo, put it in a file and read it with pyarrow and write it to parquet.
1035+
doc1 = self.coll.find_one({"_id": 1})
1036+
string1 = json_util.dumps(doc1, indent=2)
1037+
file1 = io.BytesIO(bytes(string1, encoding="utf-8"))
1038+
papatable1 = pyarrow.json.read_json(file1)
1039+
write_table(papatable1, io.BytesIO())
1040+
1041+
# read document with pymongoarrow and write it to parquet.
1042+
pmapatable1 = find_arrow_all(self.coll, {"_id": {"$eq": 1}})
1043+
write_table(pmapatable1, io.BytesIO())
1044+
1045+
doc2 = self.coll.find_one({"_id": 2})
1046+
string2 = json_util.dumps(doc2, indent=2)
1047+
file2 = io.BytesIO(bytes(string2, encoding="utf-8"))
1048+
papatable2 = pyarrow.json.read_json(file2)
1049+
write_table(papatable2, io.BytesIO())
1050+
1051+
pmapatable2 = find_arrow_all(self.coll, {"_id": {"$eq": 2}})
1052+
assert pmapatable2.to_pylist()[0] == doc2
1053+
write_table(pmapatable2, io.BytesIO())
1054+
10241055

10251056
class TestArrowExplicitApi(ArrowApiTestMixin, unittest.TestCase):
10261057
def run_find(self, *args, **kwargs):

bindings/python/test/utils.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,10 @@ def setUp(self):
144144
self.cmd_listener.reset()
145145
self.getmore_listener.reset()
146146

147+
@classmethod
148+
def tearDownClass(cls):
149+
cls.client.close()
150+
147151
def assertType(self, obj1, arrow_type):
148152
if isinstance(obj1, pa.ChunkedArray):
149153
if "storage_type" in dir(arrow_type) and obj1.type != arrow_type:

0 commit comments

Comments
 (0)