@@ -83,7 +83,7 @@ _field_type_map = {
83
83
}
84
84
85
85
cdef extract_field_dtype(bson_iter_t * doc_iter, bson_iter_t * child_iter, bson_type_t value_t, context):
86
- """ Get the appropropriate data type for a specific field"""
86
+ """ Get the appropriate data type for a specific field"""
87
87
cdef const uint8_t * val_buf = NULL
88
88
cdef uint32_t val_buf_len = 0
89
89
cdef bson_subtype_t subtype
@@ -102,13 +102,15 @@ cdef extract_field_dtype(bson_iter_t * doc_iter, bson_iter_t * child_iter, bson_
102
102
elif value_t == BSON_TYPE_BINARY:
103
103
bson_iter_binary (doc_iter, & subtype, & val_buf_len, & val_buf)
104
104
field_type = BinaryType(subtype)
105
+ elif value_t == BSON_TYPE_NULL:
106
+ field_type = None
105
107
else :
106
108
raise PyMongoArrowError(' unknown value type {}' .format(value_t))
107
109
return field_type
108
110
109
111
110
112
cdef extract_document_dtype(bson_iter_t * doc_iter, context):
111
- """ Get the appropropriate data type for a sub document"""
113
+ """ Get the appropriate data type for a sub document"""
112
114
cdef const char * key
113
115
cdef bson_type_t value_t
114
116
cdef bson_iter_t child_iter
@@ -117,18 +119,24 @@ cdef extract_document_dtype(bson_iter_t * doc_iter, context):
117
119
key = bson_iter_key(doc_iter)
118
120
value_t = bson_iter_type(doc_iter)
119
121
field_type = extract_field_dtype(doc_iter, & child_iter, value_t, context)
120
- fields.append(field(key.decode(' utf-8' ), field_type))
121
- return struct (fields)
122
+ if field_type is not None :
123
+ fields.append(field(key.decode(' utf-8' ), field_type))
124
+ if fields:
125
+ return struct (fields)
126
+ return None
122
127
123
128
cdef extract_array_dtype(bson_iter_t * doc_iter, context):
124
- """ Get the appropropriate data type for a sub array"""
129
+ """ Get the appropriate data type for a sub array"""
125
130
cdef const char * key
126
131
cdef bson_type_t value_t
127
132
cdef bson_iter_t child_iter
128
133
fields = []
129
- first_item = bson_iter_next(doc_iter)
130
- value_t = bson_iter_type(doc_iter)
131
- return extract_field_dtype(doc_iter, & child_iter, value_t, context)
134
+ while bson_iter_next(doc_iter):
135
+ value_t = bson_iter_type(doc_iter)
136
+ field_type = extract_field_dtype(doc_iter, & child_iter, value_t, context)
137
+ if field_type is not None :
138
+ return field_type
139
+ return None
132
140
133
141
def process_bson_stream (bson_stream , context , arr_value_builder = None ):
134
142
""" Process a bson byte stream using a PyMongoArrowContext"""
@@ -198,10 +206,14 @@ def process_bson_stream(bson_stream, context, arr_value_builder=None):
198
206
elif builder_type == DocumentBuilder:
199
207
bson_iter_recurse(& doc_iter, & child_iter)
200
208
struct_dtype = extract_document_dtype(& child_iter, context)
209
+ if struct_dtype is None :
210
+ continue
201
211
builder = DocumentBuilder(struct_dtype, context.tzinfo)
202
212
elif builder_type == ListBuilder:
203
213
bson_iter_recurse(& doc_iter, & child_iter)
204
214
list_dtype = extract_array_dtype(& child_iter, context)
215
+ if list_dtype is None :
216
+ continue
205
217
list_dtype = list_(list_dtype)
206
218
builder = ListBuilder(list_dtype, context.tzinfo, value_builder = arr_value_builder)
207
219
elif builder_type == BinaryBuilder:
0 commit comments