Skip to content

Commit a46c29c

Browse files
authored
ARROW-53 Add Support for Boolean Type (#51)
1 parent 888cbf8 commit a46c29c

File tree

7 files changed

+90
-5
lines changed

7 files changed

+90
-5
lines changed

bindings/python/docs/source/changelog.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ Changes in Version 0.3.0
66
- Support for `ObjectId` `bson` type.
77
- Improve error message when schema contains an unsupported type.
88
- Add support for BSON string type.
9+
- Add support for BSON boolean type.
910

1011
Changes in Version 0.2.0
1112
------------------------

bindings/python/docs/source/supported_types.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ Support for additional types will be added in subsequent releases.
1919
- :class:`py.str`, an instance of :class:`pyarrow.string`
2020
* - ObjectId
2121
- :class:`py.bytes`, :class:`bson.ObjectId`, an instance of :class:`pymongoarrow.types.ObjectIdType`, an instance of :class:`pyarrow.FixedSizeBinaryScalar`
22+
* - Boolean
23+
- an instance of :class:`~pyarrow.bool_`, :class:`~py.bool`
2224
* - 64-bit binary floating point
2325
- :class:`py.float`, an instance of :meth:`pyarrow.float64`
2426
* - 32-bit integer

bindings/python/pymongoarrow/context.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@
1414
from bson.codec_options import DEFAULT_CODEC_OPTIONS
1515

1616
from pyarrow import timestamp, Table
17-
from pymongoarrow.lib import Int32Builder, Int64Builder, DoubleBuilder, DatetimeBuilder, ObjectIdBuilder, StringBuilder
17+
from pymongoarrow.lib import (Int32Builder, Int64Builder, DoubleBuilder,
18+
DatetimeBuilder, ObjectIdBuilder,
19+
StringBuilder, BoolBuilder)
1820
from pymongoarrow.types import _get_internal_typemap, _BsonArrowTypes
1921

2022

@@ -25,6 +27,7 @@
2527
_BsonArrowTypes.datetime: DatetimeBuilder,
2628
_BsonArrowTypes.objectid: ObjectIdBuilder,
2729
_BsonArrowTypes.string: StringBuilder,
30+
_BsonArrowTypes.bool: BoolBuilder,
2831
}
2932

3033

bindings/python/pymongoarrow/lib.pyx

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ def process_bson_stream(bson_stream, context):
7272
t_datetime = _BsonArrowTypes.datetime
7373
t_oid = _BsonArrowTypes.objectid
7474
t_string = _BsonArrowTypes.string
75+
t_bool = _BsonArrowTypes.bool
7576
builder_map = context.builder_map
7677

7778
# initialize count to current length of builders
@@ -129,6 +130,11 @@ def process_bson_stream(bson_stream, context):
129130
builder.append(bson_iter_date_time(&doc_iter))
130131
else:
131132
builder.append_null()
133+
elif ftype == t_bool:
134+
if value_t == BSON_TYPE_BOOL:
135+
builder.append(bson_iter_bool(&doc_iter))
136+
else:
137+
builder.append_null()
132138
else:
133139
raise PyMongoArrowError('unknown ftype {}'.format(ftype))
134140
count += 1
@@ -330,3 +336,31 @@ cdef class DatetimeBuilder(_ArrayBuilderBase):
330336

331337
cdef shared_ptr[CTimestampBuilder] unwrap(self):
332338
return self.builder
339+
340+
341+
cdef class BoolBuilder(_ArrayBuilderBase):
342+
type_marker = _BsonArrowTypes.bool
343+
cdef:
344+
shared_ptr[CBooleanBuilder] builder
345+
346+
def __cinit__(self, MemoryPool memory_pool=None):
347+
cdef CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
348+
self.builder.reset(new CBooleanBuilder(pool))
349+
350+
cpdef append_null(self):
351+
self.builder.get().AppendNull()
352+
353+
def __len__(self):
354+
return self.builder.get().length()
355+
356+
cpdef append(self, value):
357+
self.builder.get().Append(<c_bool>value)
358+
359+
cpdef finish(self):
360+
cdef shared_ptr[CArray] out
361+
with nogil:
362+
self.builder.get().Finish(&out)
363+
return pyarrow_wrap_array(out)
364+
365+
cdef shared_ptr[CBooleanBuilder] unwrap(self):
366+
return self.builder

bindings/python/pymongoarrow/types.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
from bson import Int64, ObjectId
1818

19-
from pyarrow import timestamp, binary, float64, int64, int32, string
19+
from pyarrow import timestamp, binary, float64, int64, int32, string, bool_
2020
from pyarrow import PyExtensionType
2121
from pyarrow import DataType as _ArrowDataType
2222
import pyarrow.types as _atypes
@@ -29,6 +29,7 @@ class _BsonArrowTypes(enum.Enum):
2929
int64 = 4
3030
objectid = 5
3131
string = 6
32+
bool = 7
3233

3334

3435
# Custom Extension Types.
@@ -59,6 +60,7 @@ def _is_objectid(obj):
5960
datetime: lambda _: timestamp('ms'), # TODO: add tzinfo support
6061
ObjectId: lambda _: ObjectIdType(),
6162
str: lambda: string(),
63+
bool: lambda: bool_(),
6264
}
6365

6466

@@ -69,6 +71,7 @@ def _is_objectid(obj):
6971
_atypes.is_timestamp: _BsonArrowTypes.datetime,
7072
_is_objectid: _BsonArrowTypes.objectid,
7173
_atypes.is_string: _BsonArrowTypes.string,
74+
_atypes.is_boolean: _BsonArrowTypes.bool,
7275
}
7376

7477

bindings/python/test/test_bson.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from pymongoarrow.context import PyMongoArrowContext
2020
from pymongoarrow.lib import process_bson_stream
2121
from pymongoarrow.schema import Schema
22-
from pymongoarrow.types import int32, int64, string, ObjectId, ObjectIdType
22+
from pymongoarrow.types import int32, int64, string, ObjectId, ObjectIdType, bool_
2323

2424

2525
class TestBsonToArrowConversionBase(TestCase):
@@ -154,3 +154,22 @@ def test_object_id_type(self):
154154
arr = pa.ExtensionArray.from_storage(ObjectIdType(), storage_array)
155155
result = self.serialize_array(arr)
156156
assert result.type._type_marker == ObjectIdType._type_marker
157+
158+
159+
class TestBooleanType(TestBsonToArrowConversionBase):
160+
def setUp(self):
161+
self.schema = Schema({'data': bool_()})
162+
self.context = PyMongoArrowContext.from_schema(
163+
self.schema)
164+
165+
def test_simple(self):
166+
docs = [{'data': True},
167+
{'data': False},
168+
{'data': 19},
169+
{'data': "string"},
170+
{'data': False},
171+
{'data': True}]
172+
as_dict = {
173+
'data': [True, False, None, None, False, True]
174+
}
175+
self._run_test(docs, as_dict)

bindings/python/test/test_builders.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@
1616
from unittest import TestCase
1717
from bson.objectid import ObjectId
1818

19-
from pyarrow import Array, timestamp, int32, int64
19+
from pyarrow import Array, timestamp, int32, int64, bool_
2020

2121
from pymongoarrow.lib import (
2222
DatetimeBuilder, DoubleBuilder, Int32Builder, Int64Builder,
23-
ObjectIdBuilder, StringBuilder)
23+
ObjectIdBuilder, StringBuilder, BoolBuilder)
2424

2525

2626
class TestIntBuildersMixin:
@@ -146,3 +146,26 @@ def test_simple(self):
146146
self.assertEqual(len(arr), 5)
147147
self.assertEqual(
148148
arr.to_pylist(), values + [None])
149+
150+
151+
class TestBoolBuilderMixin:
152+
def test_simple(self):
153+
builder = BoolBuilder()
154+
builder.append(False)
155+
builder.append_values([True, False, True, False, True, False])
156+
builder.append_null()
157+
arr = builder.finish()
158+
159+
self.assertIsInstance(arr, Array)
160+
self.assertEqual(arr.null_count, 1)
161+
self.assertEqual(len(arr), 8)
162+
self.assertEqual(
163+
arr.to_pylist(), [False, True, False, True, False, True, False,
164+
None])
165+
self.assertEqual(arr.type, self.data_type)
166+
167+
168+
class TestBoolBuilder(TestCase, TestBoolBuilderMixin):
169+
def setUp(self):
170+
self.builder_cls = BoolBuilder
171+
self.data_type = bool_()

0 commit comments

Comments
 (0)