Skip to content

Commit 624d7f9

Browse files
authored
Merge pull request #1 from Finc3/feat/raise_null_type
Raise exception on null type
2 parents c81b96c + e552144 commit 624d7f9

File tree

4 files changed

+168
-57
lines changed

4 files changed

+168
-57
lines changed

bindings/python/pymongoarrow/context.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ def __init__(self, schema, builder_map, codec_options=None):
7373
self.tzinfo = None
7474

7575
self.raise_on_type_error = schema.raise_on_type_error if schema is not None else False
76+
self.raise_on_type_null = schema.raise_on_type_null if schema is not None else False
7677

7778
@classmethod
7879
def from_schema(cls, schema, codec_options=DEFAULT_CODEC_OPTIONS):

bindings/python/pymongoarrow/lib.pyx

Lines changed: 71 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -287,26 +287,26 @@ cdef void process_raw_bson_stream(const uint8_t * docstream, size_t length, obje
287287
else:
288288
if context.raise_on_type_error:
289289
raise PyMongoArrowError(f"Type mismatch! {key} is not an int32")
290-
else:
291-
# Use append (not append_raw) to surface overflow errors.
292-
int32_builder.append(val64)
290+
int32_builder.append(val64)
293291

294292
elif value_t == BSON_TYPE_DOUBLE:
295293
# Treat nan as null.
296294
val = bson_iter_as_double(&doc_iter)
297295
if isnan(val):
298296
if context.raise_on_type_error:
299297
raise PyMongoArrowError(f"Type mismatch! {key} is not an int32")
300-
else:
301-
int32_builder.append_null()
298+
int32_builder.append_null()
302299
else:
303300
# Use append (not append_raw) to surface overflow errors.
304301
int32_builder.append(bson_iter_as_int64(&doc_iter))
302+
elif value_t == BSON_TYPE_NULL:
303+
if context.raise_on_type_null:
304+
raise PyMongoArrowError(f"Null value for {key}!")
305+
int32_builder.append_null()
305306
else:
306307
if context.raise_on_type_error:
307308
raise PyMongoArrowError(f"Type mismatch! {key} is not an int32")
308-
else:
309-
int32_builder.append_null()
309+
int32_builder.append_null()
310310
elif ftype == BSON_TYPE_INT64:
311311
int64_builder = builder
312312
if (value_t == BSON_TYPE_INT64 or
@@ -319,44 +319,55 @@ cdef void process_raw_bson_stream(const uint8_t * docstream, size_t length, obje
319319
if isnan(val):
320320
if context.raise_on_type_error:
321321
raise PyMongoArrowError(f"Type mismatch! {key} is not an int64")
322-
else:
323-
int64_builder.append_null()
322+
int64_builder.append_null()
324323
else:
325324
int64_builder.append_raw(bson_iter_as_int64(&doc_iter))
325+
elif value_t == BSON_TYPE_NULL:
326+
if context.raise_on_type_null:
327+
raise PyMongoArrowError(f"Null value for {key}!")
328+
int64_builder.append_null()
326329
else:
327330
if context.raise_on_type_error:
328331
raise PyMongoArrowError(f"Type mismatch! {key} is not an int64")
329-
else:
330-
int64_builder.append_null()
332+
int64_builder.append_null()
331333
elif ftype == BSON_TYPE_OID:
332334
objectid_builder = builder
333335
if value_t == BSON_TYPE_OID:
334336
objectid_builder.append_raw(bson_iter_oid(&doc_iter))
337+
elif value_t == BSON_TYPE_NULL:
338+
if context.raise_on_type_null:
339+
raise PyMongoArrowError(f"Null value for {key}!")
340+
objectid_builder.append_null()
335341
else:
336342
if context.raise_on_type_error:
337343
raise PyMongoArrowError(f"Type mismatch! {key} is not an oid")
338-
else:
339-
objectid_builder.append_null()
344+
objectid_builder.append_null()
340345
elif ftype == BSON_TYPE_UTF8:
341346
string_builder = builder
342347
if value_t == BSON_TYPE_UTF8:
343348
bson_str = bson_iter_utf8(&doc_iter, &str_len)
344349
string_builder.append_raw(bson_str, str_len)
350+
elif value_t == BSON_TYPE_NULL:
351+
if context.raise_on_type_null:
352+
raise PyMongoArrowError(f"Null value for {key}!")
353+
string_builder.append_null()
345354
else:
346355
if context.raise_on_type_error:
347356
raise PyMongoArrowError(f"Type mismatch! {key} is not an UTF8")
348-
else:
349-
string_builder.append_null()
357+
string_builder.append_null()
350358
elif ftype == BSON_TYPE_CODE:
351359
code_builder = builder
352360
if value_t == BSON_TYPE_CODE:
353361
bson_str = bson_iter_code(&doc_iter, &str_len)
354362
code_builder.append_raw(bson_str, str_len)
363+
elif value_t == BSON_TYPE_NULL:
364+
if context.raise_on_type_null:
365+
raise PyMongoArrowError(f"Null value for {key}!")
366+
code_builder.append_null()
355367
else:
356368
if context.raise_on_type_error:
357369
raise PyMongoArrowError(f"Type mismatch! {key} is not an code")
358-
else:
359-
code_builder.append_null()
370+
code_builder.append_null()
360371
elif ftype == BSON_TYPE_DECIMAL128:
361372
dec128_builder = builder
362373
if value_t == BSON_TYPE_DECIMAL128:
@@ -373,86 +384,109 @@ cdef void process_raw_bson_stream(const uint8_t * docstream, size_t length, obje
373384
else:
374385
if context.raise_on_type_error:
375386
raise PyMongoArrowError(f"Type mismatch! {key} is not an bigEndian not supported")
376-
else:
377-
# We do not support big-endian systems.
378-
dec128_builder.append_null()
387+
# We do not support big-endian systems.
388+
dec128_builder.append_null()
389+
elif value_t == BSON_TYPE_NULL:
390+
if context.raise_on_type_null:
391+
raise PyMongoArrowError(f"Null value for {key}!")
392+
dec128_builder.append_null()
379393
else:
380394
if context.raise_on_type_error:
381395
raise PyMongoArrowError(f"Type mismatch! {key} is not an bigEndian")
382-
else:
383-
dec128_builder.append_null()
396+
dec128_builder.append_null()
384397
elif ftype == BSON_TYPE_DOUBLE:
385398
double_builder = builder
386399
if (value_t == BSON_TYPE_DOUBLE or
387400
value_t == BSON_TYPE_BOOL or
388401
value_t == BSON_TYPE_INT32 or
389402
value_t == BSON_TYPE_INT64):
390403
double_builder.append_raw(bson_iter_as_double(&doc_iter))
404+
elif value_t == BSON_TYPE_NULL:
405+
if context.raise_on_type_null:
406+
raise PyMongoArrowError(f"Null value for {key}!")
407+
double_builder.append_null()
391408
else:
392409
if context.raise_on_type_error:
393410
raise PyMongoArrowError(f"Type mismatch! {key} is not an double")
394-
else:
395-
double_builder.append_null()
411+
double_builder.append_null()
396412
elif ftype == ARROW_TYPE_DATE32:
397413
date32_builder = builder
398414
if value_t == BSON_TYPE_DATE_TIME:
399415
date32_builder.append_raw(bson_iter_date_time(&doc_iter))
416+
elif value_t == BSON_TYPE_NULL:
417+
if context.raise_on_type_null:
418+
raise PyMongoArrowError(f"Null value for {key}!")
419+
date32_builder.append_null()
400420
else:
401421
if context.raise_on_type_error:
402422
raise PyMongoArrowError(f"Type mismatch! {key} is not a data32")
403-
else:
404-
date32_builder.append_null()
423+
date32_builder.append_null()
405424
elif ftype == ARROW_TYPE_DATE64:
406425
date64_builder = builder
407426
if value_t == BSON_TYPE_DATE_TIME:
408427
date64_builder.append_raw(bson_iter_date_time(&doc_iter))
428+
elif value_t == BSON_TYPE_NULL:
429+
if context.raise_on_type_null:
430+
raise PyMongoArrowError(f"Null value for {key}!")
431+
date64_builder.append_null()
409432
else:
410433
if context.raise_on_type_error:
411434
raise PyMongoArrowError(f"Type mismatch! {key} is not a date64")
412-
else:
413-
date64_builder.append_null()
435+
date64_builder.append_null()
414436
elif ftype == BSON_TYPE_DATE_TIME:
415437
datetime_builder = builder
416438
if value_t == BSON_TYPE_DATE_TIME:
417439
datetime_builder.append_raw(bson_iter_date_time(&doc_iter))
440+
elif value_t == BSON_TYPE_NULL:
441+
if context.raise_on_type_null:
442+
raise PyMongoArrowError(f"Null value for {key}!")
443+
datetime_builder.append_null()
418444
else:
419445
if context.raise_on_type_error:
420446
raise PyMongoArrowError(f"Type mismatch! {key} is not a datetime")
421-
else:
422-
datetime_builder.append_null()
447+
datetime_builder.append_null()
423448
elif ftype == BSON_TYPE_BOOL:
424449
bool_builder = builder
425450
if value_t == BSON_TYPE_BOOL:
426451
bool_builder.append_raw(bson_iter_bool(&doc_iter))
452+
elif value_t == BSON_TYPE_NULL:
453+
if context.raise_on_type_null:
454+
raise PyMongoArrowError(f"Null value for {key}!")
455+
bool_builder.append_null()
427456
else:
428457
if context.raise_on_type_error:
429458
raise PyMongoArrowError(f"Type mismatch! {key} is not a bool")
430-
else:
431-
bool_builder.append_null()
459+
bool_builder.append_null()
432460
elif ftype == BSON_TYPE_DOCUMENT:
433461
doc_builder = builder
434462
if value_t == BSON_TYPE_DOCUMENT:
435463
bson_iter_document(&doc_iter, &val_buf_len, &val_buf)
436464
if val_buf_len <= 0:
437465
raise ValueError("Subdocument is invalid")
438466
doc_builder.append_raw(val_buf, val_buf_len)
467+
elif value_t == BSON_TYPE_NULL:
468+
if context.raise_on_type_null:
469+
raise PyMongoArrowError(f"Null value for {key}!")
470+
doc_builder.append_null()
439471
else:
440472
if context.raise_on_type_error:
441473
raise PyMongoArrowError(f"Type mismatch! {key} is not a document")
442-
else:
443-
doc_builder.append_null()
474+
doc_builder.append_null()
444475
elif ftype == BSON_TYPE_ARRAY:
445476
list_builder = builder
446477
if value_t == BSON_TYPE_ARRAY:
447478
bson_iter_array(&doc_iter, &val_buf_len, &val_buf)
448479
if val_buf_len <= 0:
449480
raise ValueError("Subarray is invalid")
450481
list_builder.append_raw(val_buf, val_buf_len)
482+
elif value_t == BSON_TYPE_NULL:
483+
if context.raise_on_type_null:
484+
raise PyMongoArrowError(f"Null value for {key}!")
485+
list_builder.append_null()
451486
else:
452487
if context.raise_on_type_error:
453488
raise PyMongoArrowError(f"Type mismatch! {key} is not an array")
454-
else:
455-
list_builder.append_null()
489+
list_builder.append_null()
456490
elif ftype == BSON_TYPE_BINARY:
457491
binary_builder = builder
458492
if value_t == BSON_TYPE_BINARY:
@@ -461,8 +495,7 @@ cdef void process_raw_bson_stream(const uint8_t * docstream, size_t length, obje
461495
if subtype != binary_builder._subtype:
462496
if context.raise_on_type_error:
463497
raise PyMongoArrowError(f"Type mismatch! {key} binary subtype does not match")
464-
else:
465-
binary_builder.append_null()
498+
binary_builder.append_null()
466499
else:
467500
binary_builder.append_raw(<char*>val_buf, val_buf_len)
468501
else:

bindings/python/pymongoarrow/schema.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,14 @@ class Schema:
3838
corresponding type-identifiers, see :ref:`type support`.
3939
"""
4040

41-
def __init__(self, schema, raise_on_type_error=False):
41+
def __init__(self, schema, raise_on_type_null=False, raise_on_type_error=False):
4242
"""Create a :class:`~pymongoarrow.schema.Schema` instance from a
4343
mapping or an iterable.
4444
4545
:Parameters:
4646
- `schema`: A mapping.
47+
- `raise_on_type_null`: If True, raise an error if a field is null.
48+
- `raise_on_type_error`: If True, raise an error if a field is not of the expected type.
4749
"""
4850
if isinstance(schema, abc.Mapping):
4951
normed = type(self)._normalize_mapping(schema)
@@ -52,6 +54,7 @@ def __init__(self, schema, raise_on_type_error=False):
5254
raise ValueError(msg)
5355
self.typemap = normed
5456
self.raise_on_type_error = raise_on_type_error
57+
self.raise_on_type_null = raise_on_type_null
5558

5659
def __iter__(self):
5760
yield from self.typemap

0 commit comments

Comments
 (0)