File tree Expand file tree Collapse file tree 2 files changed +49
-0
lines changed Expand file tree Collapse file tree 2 files changed +49
-0
lines changed Original file line number Diff line number Diff line change @@ -254,6 +254,12 @@ cdef class BuilderManager:
254254 # For list children, the nulls are stored in the parent.
255255 key = field.encode(' utf-8' )
256256 parent_type = self .parent_types.get(key, None )
257+ # Check if the item was in our schema but never seen, and should have a parent.
258+ if parent_type is None and " ." in field:
259+ parent_key, _, _ = field.rpartition(' .' )
260+ self .parent_names[key] = parent_key.encode(' utf-8' )
261+ parent_type = BSON_TYPE_DOCUMENT
262+ # Add nulls according to parent type.
257263 if parent_type == BSON_TYPE_ARRAY:
258264 continue
259265 if parent_type == BSON_TYPE_DOCUMENT:
Original file line number Diff line number Diff line change @@ -533,6 +533,49 @@ def test_schema_arrays_of_documents_with_nulls(self):
533533 expected = json .load (fid )
534534 assert df .to_pylist () == expected
535535
536+ def test_schema_arrays_of_documents_orphaned_null (self ):
537+ # From https://github.com/mongodb-labs/mongo-arrow/issues/265.
538+ col = self .coll
539+ col .delete_many ({})
540+ schema = Schema (
541+ {
542+ "_id" : ObjectId ,
543+ "test_list_struct" : [
544+ {
545+ "field1" : {
546+ "sub_field1" : pa .string (),
547+ "sub_field2" : pa .string (),
548+ }
549+ }
550+ ],
551+ }
552+ )
553+
554+ col .insert_one (
555+ {
556+ "_id" : ObjectId ("000000000000000000000001" ),
557+ "test_list_struct" : [
558+ {
559+ "field1" : {
560+ "sub_field1" : "test_data" ,
561+ }
562+ },
563+ {
564+ "field1" : "test_data" ,
565+ },
566+ ],
567+ }
568+ )
569+ df = aggregate_arrow_all (col , schema = schema , pipeline = [])
570+ doc = df .to_pylist ()[0 ]
571+ del doc ["_id" ]
572+ assert doc == {
573+ "test_list_struct" : [
574+ {"field1" : {"sub_field1" : "test_data" , "sub_field2" : None }},
575+ {"field1" : {"sub_field1" : None , "sub_field2" : None }},
576+ ]
577+ }
578+
536579 def test_auto_schema_nested (self ):
537580 # Create table with random data of various types.
538581 _ , data = self ._create_nested_data ()
You can’t perform that action at this time.
0 commit comments