Skip to content

Commit ca88c6e

Browse files
authored
ARROW-79 Handling PyMongo errors in PyMongoArrow Writing Support (#76)
1 parent 0ecd3df commit ca88c6e

File tree

3 files changed

+52
-17
lines changed

3 files changed

+52
-17
lines changed

bindings/python/pymongoarrow/api.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import warnings
1515

1616
import numpy as np
17+
import pymongo.errors
1718
from bson import encode
1819
from bson.raw_bson import RawBSONDocument
1920
from numpy import ndarray
@@ -262,7 +263,11 @@ def _transform_bwe(bwe, offset):
262263
bwe["nInserted"] += offset
263264
for i in bwe["writeErrors"]:
264265
i["index"] += offset
265-
return bwe
266+
return {
267+
"writeErrors": bwe["writeErrors"],
268+
"nInserted": bwe["nInserted"],
269+
"writeConcernErrors": bwe["writeConcernErrors"],
270+
}
266271

267272

268273
def _tabular_generator(tabular):
@@ -336,7 +341,14 @@ def write(collection, tabular):
336341
collection.insert_many(cur_batch)
337342
except BulkWriteError as bwe:
338343
raise ArrowWriteError(_transform_bwe(dict(bwe.details), cur_offset)) from bwe
339-
344+
except pymongo.errors.PyMongoError as pme:
345+
raise ArrowWriteError(
346+
{
347+
"writeErrors": [{"errmsg": str(pme), "index": cur_offset}],
348+
"nInserted": cur_offset,
349+
"writeConcernErrors": [],
350+
}
351+
) from pme
340352
results["insertedCount"] += i
341353
cur_offset += i
342354

bindings/python/pymongoarrow/errors.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,18 +33,17 @@ def __init__(self, details):
3333
def details(self):
3434
"""Details for the error.
3535
36-
It is a dictionary of key-value pairs giving diagnostic information about what went wrong. To see the entire dictionary simply use `print(awe.details)`.
36+
It is a dictionary of key-value pairs giving diagnostic information about what went wrong.
37+
To see the entire dictionary simply use `print(awe.details)`.
3738
3839
Details will have the following format:
3940
{
4041
'writeErrors': [...],
4142
'writeConcernErrors': [...],
4243
'nInserted': ...,
43-
'nUpserted': ...,
44-
'nMatched': ...,
45-
'nModified': ...,
46-
'nRemoved': ...,
47-
'upserted': [...]
4844
}
45+
46+
If the error was caused by a PyMongo exception, then you can access that exception using the
47+
``__cause__`` attribute.
4948
"""
5049
return self._details

bindings/python/test/test_arrow.py

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
from pyarrow import schema as ArrowSchema
2424
from pyarrow import string, timestamp
2525
from pyarrow.parquet import read_table, write_table
26-
from pymongo import DESCENDING, WriteConcern
26+
from pymongo import DESCENDING, MongoClient, WriteConcern
2727
from pymongo.collection import Collection
2828
from pymongoarrow.api import Schema, aggregate_arrow_all, find_arrow_all, write
2929
from pymongoarrow.errors import ArrowWriteError
@@ -202,14 +202,38 @@ def test_write_error(self):
202202
{"_id": [i for i in range(10001)] * 2, "data": [i * 2 for i in range(10001)] * 2},
203203
ArrowSchema(schema),
204204
)
205-
with self.assertRaises(ArrowWriteError):
206-
try:
207-
self.round_trip(data, Schema(schema))
208-
except ArrowWriteError as awe:
209-
self.assertEqual(
210-
10001, awe.details["writeErrors"][0]["index"], awe.details["nInserted"]
211-
)
212-
raise awe
205+
with self.assertRaises(ArrowWriteError) as awe:
206+
self.round_trip(data, Schema(schema))
207+
self.assertEqual(
208+
10001,
209+
awe.exception.details["writeErrors"][0]["index"],
210+
awe.exception.details["nInserted"],
211+
)
212+
self.assertEqual(
213+
awe.exception.details.keys(), {"nInserted", "writeConcernErrors", "writeErrors"}
214+
)
215+
216+
def test_pymongo_error(self):
217+
schema = {"_id": int32(), "data": int64()}
218+
data = Table.from_pydict(
219+
{"_id": [i for i in range(10001)] * 2, "data": [i * 2 for i in range(10001)] * 2},
220+
ArrowSchema(schema),
221+
)
222+
223+
with self.assertRaises(ArrowWriteError) as exc:
224+
write(
225+
MongoClient(
226+
host="somedomainthatdoesntexist.org",
227+
port=123456789,
228+
serverSelectionTimeoutMS=10,
229+
).pymongoarrow_test.get_collection(
230+
"test", write_concern=WriteConcern(w="majority")
231+
),
232+
data,
233+
)
234+
self.assertEqual(
235+
exc.exception.details.keys(), {"nInserted", "writeConcernErrors", "writeErrors"}
236+
)
213237

214238
def test_write_schema_validation(self):
215239
schema = {

0 commit comments

Comments
 (0)