Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions bindings/python/pymongoarrow/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def __init__(self, schema, codec_options=None):
from pymongoarrow.lib import BuilderManager

self.manager = BuilderManager(schema_map, self.schema is not None, self.tzinfo)
self.schema_map = schema_map

def process_bson_stream(self, stream):
self.manager.process_bson_stream(stream, len(stream))
Expand All @@ -59,8 +60,15 @@ def _parse_builder_map(builder_map):
# Traverse the builder map right to left.
for key, value in reversed(builder_map.items()):
if value.type_marker == _BsonArrowTypes.document.value:
names = value.finish()
full_names = [f"{key}.{name}" for name in names]
names = []
full_names = []
for candidate in list(builder_map):
if candidate.startswith(key + "."):
name = candidate[len(key) + 1 :]
if "." in name or "[" in name:
continue
names.append(name)
full_names.append(candidate)
arrs = [builder_map[c] for c in full_names]
builder_map[key] = StructArray.from_arrays(arrs, names=names)
to_remove.extend(full_names)
Expand Down
2 changes: 1 addition & 1 deletion bindings/python/pymongoarrow/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ cdef class BuilderManager:
# We only use the doc_iter for binary arrays, which are handled already.
self.get_builder(name, ftype, <bson_iter_t *>nullptr)

cdef _ArrayBuilderBase get_builder(self, cstring key, bson_type_t value_t, bson_iter_t * doc_iter) except *:
cdef _ArrayBuilderBase get_builder(self, cstring key, bson_type_t value_t, bson_iter_t * doc_iter):
cdef _ArrayBuilderBase builder = None
cdef bson_subtype_t subtype
cdef const uint8_t *val_buf = NULL
Expand Down
26 changes: 26 additions & 0 deletions bindings/python/test/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,32 @@ def test_string_bool(self):
),
)

def test_schema_missing_field(self):
self.coll.drop()
self.coll.insert_one(
{
"_id": ObjectId("000000000000000000000013"),
"list_field": [{"name": "Test1", "test": "Test2"}],
}
)

schema = Schema(
{
"_id": ObjectId,
"list_field": [
{
"name": pa.string(),
"test": pa.string(),
"test_test": pa.string(), # does not exist in the database collection
}
],
}
)
expected = [[{"name": "Test1", "test": "Test2", "test_test": None}]]
for func in [find_arrow_all, aggregate_arrow_all]:
out = func(self.coll, {} if func == find_arrow_all else [], schema=schema).drop(["_id"])
self.assertEqual(out["list_field"].to_pylist(), expected)

def test_auto_schema_nested(self):
# Create table with random data of various types.
_, data = self._create_nested_data()
Expand Down
Loading