@@ -487,7 +487,7 @@ def test_auto_schema_heterogeneous(self):
487
487
self .coll .insert_many (data )
488
488
for func in [find_arrow_all , aggregate_arrow_all ]:
489
489
out = func (self .coll , {} if func == find_arrow_all else []).drop (["_id" ])
490
- self .assertEqual (out ["a" ].to_pylist (), [1 , None , None , 4 ])
490
+ self .assertEqual (out ["a" ].to_pylist (), [1 , None , 1 , 4 ])
491
491
492
492
def test_auto_schema_tz (self ):
493
493
# Create table with random data of various types.
@@ -564,7 +564,7 @@ def test_malformed_embedded_documents(self):
564
564
dict (data = dict (a = 1 , b = True )),
565
565
dict (data = dict (a = 1 , b = True , c = "bar" )),
566
566
dict (data = dict (a = 1 )),
567
- dict (data = dict (a = True , b = False )),
567
+ dict (data = dict (a = "str" , b = False )),
568
568
]
569
569
self .coll .drop ()
570
570
self .coll .insert_many (data )
@@ -590,6 +590,56 @@ def test_mixed_subtype(self):
590
590
res = find_arrow_all (coll , {}, schema = schema )
591
591
self .assertEqual (res ["data" ].to_pylist (), [Binary (b"1" , 10 ), None ])
592
592
593
+ def _test_mixed_types_int (self , inttype ):
594
+ docs = [
595
+ {"a" : 1 },
596
+ {"a" : 2.9 }, # float should be truncated.
597
+ {"a" : True }, # True should be 1.
598
+ {"a" : False }, # False should be 0.
599
+ {"a" : float ("nan" )}, # Should be null.
600
+ {"a" : None }, # Should be null.
601
+ {}, # Should be null.
602
+ {"a" : "string" }, # Should be null.
603
+ ]
604
+ self .coll .delete_many ({})
605
+ self .coll .insert_many (docs )
606
+ table = find_arrow_all (self .coll , {}, projection = {"_id" : 0 }, schema = Schema ({"a" : inttype }))
607
+ expected = Table .from_pylist (
608
+ [
609
+ {"a" : 1 },
610
+ {"a" : 2 },
611
+ {"a" : 1 },
612
+ {"a" : 0 },
613
+ {"a" : None },
614
+ {"a" : None },
615
+ {},
616
+ {"a" : None },
617
+ ],
618
+ schema = ArrowSchema ([field ("a" , inttype )]),
619
+ )
620
+ self .assertEqual (table , expected )
621
+
622
+ def test_mixed_types_int32 (self ):
623
+ self ._test_mixed_types_int (int32 ())
624
+ # Value too large to fit in int32 should cause an overflow error.
625
+ self .coll .delete_many ({})
626
+ self .coll .insert_one ({"a" : 2 << 34 })
627
+ with self .assertRaises (OverflowError ):
628
+ find_arrow_all (self .coll , {}, projection = {"_id" : 0 }, schema = Schema ({"a" : int32 ()}))
629
+ # Test double overflowing int32
630
+ self .coll .delete_many ({})
631
+ self .coll .insert_one ({"a" : float (2 << 34 )})
632
+ with self .assertRaises (OverflowError ):
633
+ find_arrow_all (self .coll , {}, projection = {"_id" : 0 }, schema = Schema ({"a" : int32 ()}))
634
+
635
+ def test_mixed_types_int64 (self ):
636
+ self ._test_mixed_types_int (int64 ())
637
+ # Test double overflowing int64
638
+ self .coll .delete_many ({})
639
+ self .coll .insert_one ({"a" : float (2 << 65 )})
640
+ with self .assertRaises (OverflowError ):
641
+ find_arrow_all (self .coll , {}, projection = {"_id" : 0 }, schema = Schema ({"a" : int32 ()}))
642
+
593
643
594
644
class TestArrowExplicitApi (ArrowApiTestMixin , unittest .TestCase ):
595
645
def run_find (self , * args , ** kwargs ):
0 commit comments