Skip to content

Commit 09c6c3e

Browse files
committed
INTPYTHON-575 Fix handling of empty embedded arrays
1 parent cbbab71 commit 09c6c3e

File tree

4 files changed

+40
-2
lines changed

4 files changed

+40
-2
lines changed

bindings/python/pymongoarrow/context.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def _parse_builder_map(builder_map):
7575
elif value.type_marker == _BsonArrowTypes.array.value:
7676
child_name = key + "[]"
7777
to_remove.append(child_name)
78-
child = builder_map[child_name]
78+
child = builder_map.get(child_name, [])
7979
builder_map[key] = ListArray.from_arrays(value.finish(), child)
8080
else:
8181
builder_map[key] = value.finish()

bindings/python/test/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14+
import atexit
15+
1416
import pymongo
1517

1618

@@ -39,6 +41,7 @@ def init(self):
3941

4042
if self.connected:
4143
self.client = self.get_client()
44+
atexit.register(self.client.close)
4245

4346

4447
client_context = ClientContext()

bindings/python/test/test_arrow.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import io
1516
import json
1617
import tempfile
1718
import unittest
@@ -22,8 +23,9 @@
2223
from test.utils import AllowListEventListener, NullsTestMixin
2324

2425
import pyarrow as pa
26+
import pyarrow.json
2527
import pymongo
26-
from bson import Binary, Code, CodecOptions, Decimal128, ObjectId
28+
from bson import Binary, Code, CodecOptions, Decimal128, ObjectId, json_util
2729
from pyarrow import (
2830
Table,
2931
bool_,
@@ -1021,6 +1023,35 @@ def test_decimal128(self):
10211023
coll_data = list(self.coll.find({}))
10221024
assert coll_data[0]["data"] == Decimal128(a)
10231025

1026+
def test_empty_embedded_array(self):
1027+
# From INTPYTHON-575.
1028+
self.coll.drop()
1029+
1030+
self.coll.insert_many(
1031+
[{"_id": 1, "foo": {"bar": ["1", "2"]}}, {"_id": 2, "foo": {"bar": []}}]
1032+
)
1033+
1034+
# get document out of mongo, put it in a file and read it with pyarrow and write it to parquet.
1035+
doc1 = self.coll.find_one({"_id": 1})
1036+
string1 = json_util.dumps(doc1, indent=2)
1037+
file1 = io.BytesIO(bytes(string1, encoding="utf-8"))
1038+
papatable1 = pyarrow.json.read_json(file1)
1039+
write_table(papatable1, io.BytesIO())
1040+
1041+
# read document with pymongoarrow and write it to parquet.
1042+
pmapatable1 = find_arrow_all(self.coll, {"_id": {"$eq": 1}})
1043+
write_table(pmapatable1, io.BytesIO())
1044+
1045+
doc2 = self.coll.find_one({"_id": 2})
1046+
string2 = json_util.dumps(doc2, indent=2)
1047+
file2 = io.BytesIO(bytes(string2, encoding="utf-8"))
1048+
papatable2 = pyarrow.json.read_json(file2)
1049+
write_table(papatable2, io.BytesIO())
1050+
1051+
pmapatable2 = find_arrow_all(self.coll, {"_id": {"$eq": 2}})
1052+
assert pmapatable2.to_pylist()[0] == doc2
1053+
write_table(pmapatable2, io.BytesIO())
1054+
10241055

10251056
class TestArrowExplicitApi(ArrowApiTestMixin, unittest.TestCase):
10261057
def run_find(self, *args, **kwargs):

bindings/python/test/utils.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,10 @@ def setUp(self):
144144
self.cmd_listener.reset()
145145
self.getmore_listener.reset()
146146

147+
@classmethod
148+
def tearDownClass(cls):
149+
cls.client.close()
150+
147151
def assertType(self, obj1, arrow_type):
148152
if isinstance(obj1, pa.ChunkedArray):
149153
if "storage_type" in dir(arrow_type) and obj1.type != arrow_type:

0 commit comments

Comments
 (0)