4
4
This module tests only the private methods (prefixed with underscore).
5
5
"""
6
6
7
+ from numba .typed import List as NumbaList
7
8
import numpy as np
8
9
import pytest
9
- from numba .typed import List as NumbaList
10
10
11
+ import pandas ._testing as tm
11
12
from pandas .core .nanops_numba import (
12
13
MIN_INT ,
13
14
NumbaReductionOps ,
14
15
_cast_to_timelike ,
15
16
_chunk_arr_into_arr_list ,
16
17
_get_initial_value ,
18
+ _nanvar_std_sem ,
17
19
_nb_reduce_arr_list_in_parallel ,
18
20
_nb_reduce_single_arr ,
19
- _nanvar_std_sem ,
20
21
_nullify_below_mincount ,
21
22
_reduce_chunked_results ,
22
23
_reduce_empty_array ,
23
24
nb_reduce ,
24
25
)
25
26
26
- import pandas ._testing as tm
27
-
28
27
29
28
class TestGetInitialValue :
30
29
"""Test the _get_initial_value private function."""
@@ -255,7 +254,7 @@ def test_parallel_with_mask(self):
255
254
# Create corresponding mask list
256
255
mask_list = NumbaList ()
257
256
mask_list .append (np .array ([False , True , False ])) # Mask middle element
258
- mask_list .append (np .array ([True , False , False ])) # Mask first element
257
+ mask_list .append (np .array ([True , False , False ])) # Mask first element
259
258
260
259
target = np .zeros (len (arr_list ), dtype = np .float64 )
261
260
result , counts = _nb_reduce_arr_list_in_parallel (
@@ -280,13 +279,18 @@ def test_single_chunk_reduction(self):
280
279
return_dtype = np .dtype ("float64" )
281
280
282
281
result , count = _reduce_chunked_results (
283
- "sum" , chunk_results , counts , final_length , return_dtype ,
284
- skipna = True , find_initial_value = True
282
+ "sum" ,
283
+ chunk_results ,
284
+ counts ,
285
+ final_length ,
286
+ return_dtype ,
287
+ skipna = True ,
288
+ find_initial_value = True ,
285
289
)
286
290
287
291
# Should reduce the chunk_results array itself
288
292
expected_result = np .array ([6.0 ]) # 1 + 2 + 3
289
- expected_count = np .array ([6 ]) # 2 + 2 + 2
293
+ expected_count = np .array ([6 ]) # 2 + 2 + 2
290
294
291
295
tm .assert_numpy_array_equal (result , expected_result )
292
296
tm .assert_numpy_array_equal (count , expected_count )
@@ -299,8 +303,13 @@ def test_no_chunking_needed(self):
299
303
return_dtype = np .dtype ("float64" )
300
304
301
305
result , count = _reduce_chunked_results (
302
- "sum" , chunk_results , counts , final_length , return_dtype ,
303
- skipna = True , find_initial_value = True
306
+ "sum" ,
307
+ chunk_results ,
308
+ counts ,
309
+ final_length ,
310
+ return_dtype ,
311
+ skipna = True ,
312
+ find_initial_value = True ,
304
313
)
305
314
306
315
# Should return results as-is (no further reduction needed)
@@ -371,7 +380,7 @@ def test_with_nan_values(self):
371
380
assert np .isfinite (result )
372
381
373
382
def test_complex_array (self ):
374
- arr = np .array ([1 + 2j , 3 + 4j ])
383
+ arr = np .array ([1 + 2j , 3 + 4j ])
375
384
result = _nanvar_std_sem (arr )
376
385
# Should handle complex numbers by processing real and imag parts
377
386
assert np .isfinite (result )
@@ -490,8 +499,11 @@ def test_nb_reduce_with_nans_skipna_true_multithreaded(
490
499
):
491
500
"""Test sum with NaN values and skipna=True on large array (multi-threaded)."""
492
501
result , count = nb_reduce (
493
- "sum" , large_2d_array_with_nans , axis = None ,
494
- skipna = True , multi_threading = True
502
+ "sum" ,
503
+ large_2d_array_with_nans ,
504
+ axis = None ,
505
+ skipna = True ,
506
+ multi_threading = True ,
495
507
)
496
508
497
509
# Compare with numpy nansum
@@ -503,8 +515,9 @@ def test_nb_reduce_with_nans_skipna_true_multithreaded(
503
515
504
516
def test_nb_reduce_with_nans_axis_0_multithreaded (self , large_2d_array_with_nans ):
505
517
"""Test sum with NaN values along axis 0 (multi-threaded)."""
506
- result , count = nb_reduce ("sum" , large_2d_array_with_nans , axis = 0 ,
507
- skipna = True , multi_threading = True )
518
+ result , count = nb_reduce (
519
+ "sum" , large_2d_array_with_nans , axis = 0 , skipna = True , multi_threading = True
520
+ )
508
521
509
522
# Compare with numpy nansum
510
523
expected = np .nansum (large_2d_array_with_nans , axis = 0 )
@@ -515,8 +528,9 @@ def test_nb_reduce_with_nans_axis_0_multithreaded(self, large_2d_array_with_nans
515
528
516
529
def test_nb_reduce_with_nans_axis_1_multithreaded (self , large_2d_array_with_nans ):
517
530
"""Test sum with NaN values along axis 1 (multi-threaded)."""
518
- result , count = nb_reduce ("sum" , large_2d_array_with_nans , axis = 1 ,
519
- skipna = True , multi_threading = True )
531
+ result , count = nb_reduce (
532
+ "sum" , large_2d_array_with_nans , axis = 1 , skipna = True , multi_threading = True
533
+ )
520
534
521
535
# Compare with numpy nansum
522
536
expected = np .nansum (large_2d_array_with_nans , axis = 1 )
@@ -528,12 +542,14 @@ def test_nb_reduce_with_nans_axis_1_multithreaded(self, large_2d_array_with_nans
528
542
def test_nb_reduce_single_thread_vs_multithread_consistency (self , large_2d_array ):
529
543
"""Test that single-threaded and multi-threaded results are identical."""
530
544
# Single-threaded result
531
- result_st , count_st = nb_reduce ("sum" , large_2d_array , axis = 0 ,
532
- multi_threading = False )
545
+ result_st , count_st = nb_reduce (
546
+ "sum" , large_2d_array , axis = 0 , multi_threading = False
547
+ )
533
548
534
549
# Multi-threaded result
535
- result_mt , count_mt = nb_reduce ("sum" , large_2d_array , axis = 0 ,
536
- multi_threading = True )
550
+ result_mt , count_mt = nb_reduce (
551
+ "sum" , large_2d_array , axis = 0 , multi_threading = True
552
+ )
537
553
538
554
# Results should be identical
539
555
tm .assert_numpy_array_equal (result_st , result_mt )
@@ -568,9 +584,14 @@ def test_nb_reduce_min_count_multithreaded(self, large_2d_array_with_nans):
568
584
"""Test min_count parameter with large array (multi-threaded)."""
569
585
min_count = 100 # Require at least 100 non-NaN values per column
570
586
571
- result , count = nb_reduce ("sum" , large_2d_array_with_nans , axis = 0 ,
572
- skipna = True , min_count = min_count ,
573
- multi_threading = True )
587
+ result , count = nb_reduce (
588
+ "sum" ,
589
+ large_2d_array_with_nans ,
590
+ axis = 0 ,
591
+ skipna = True ,
592
+ min_count = min_count ,
593
+ multi_threading = True ,
594
+ )
574
595
575
596
# Check that columns with insufficient data are NaN
576
597
valid_columns = count >= min_count
@@ -582,8 +603,9 @@ def test_nb_reduce_min_count_multithreaded(self, large_2d_array_with_nans):
582
603
583
604
def test_nb_reduce_mean_axis_none_multithreaded (self , large_2d_array ):
584
605
"""Test mean reduction with axis=None on large array (multi-threaded)."""
585
- result , count = nb_reduce ("mean" , large_2d_array , axis = None ,
586
- multi_threading = True )
606
+ result , count = nb_reduce (
607
+ "mean" , large_2d_array , axis = None , multi_threading = True
608
+ )
587
609
588
610
# Compare with numpy result
589
611
expected = np .mean (large_2d_array )
@@ -594,8 +616,7 @@ def test_nb_reduce_mean_axis_none_multithreaded(self, large_2d_array):
594
616
595
617
def test_nb_reduce_mean_axis_0_multithreaded (self , large_2d_array ):
596
618
"""Test mean reduction along axis 0 on large array (multi-threaded)."""
597
- result , count = nb_reduce ("mean" , large_2d_array , axis = 0 ,
598
- multi_threading = True )
619
+ result , count = nb_reduce ("mean" , large_2d_array , axis = 0 , multi_threading = True )
599
620
600
621
# Compare with numpy result
601
622
expected = np .mean (large_2d_array , axis = 0 )
@@ -606,8 +627,7 @@ def test_nb_reduce_mean_axis_0_multithreaded(self, large_2d_array):
606
627
607
628
def test_nb_reduce_mean_axis_1_multithreaded (self , large_2d_array ):
608
629
"""Test mean reduction along axis 1 on large array (multi-threaded)."""
609
- result , count = nb_reduce ("mean" , large_2d_array , axis = 1 ,
610
- multi_threading = True )
630
+ result , count = nb_reduce ("mean" , large_2d_array , axis = 1 , multi_threading = True )
611
631
612
632
# Compare with numpy result
613
633
expected = np .mean (large_2d_array , axis = 1 )
@@ -618,35 +638,38 @@ def test_nb_reduce_mean_axis_1_multithreaded(self, large_2d_array):
618
638
619
639
def test_nb_reduce_sum_square_axis_none_multithreaded (self , large_2d_array ):
620
640
"""Test sum_square reduction with axis=None on large array."""
621
- result , count = nb_reduce ("sum_square" , large_2d_array , axis = None ,
622
- multi_threading = True )
641
+ result , count = nb_reduce (
642
+ "sum_square" , large_2d_array , axis = None , multi_threading = True
643
+ )
623
644
624
645
# Compare with numpy result (sum of squares)
625
- expected = np .sum (large_2d_array ** 2 )
646
+ expected = np .sum (large_2d_array ** 2 )
626
647
expected_count = large_2d_array .size
627
648
628
649
tm .assert_almost_equal (result , expected , rtol = 1e-10 )
629
650
assert count == expected_count
630
651
631
652
def test_nb_reduce_sum_square_axis_0_multithreaded (self , large_2d_array ):
632
653
"""Test sum_square reduction along axis 0 on large array."""
633
- result , count = nb_reduce ("sum_square" , large_2d_array , axis = 0 ,
634
- multi_threading = True )
654
+ result , count = nb_reduce (
655
+ "sum_square" , large_2d_array , axis = 0 , multi_threading = True
656
+ )
635
657
636
658
# Compare with numpy result (sum of squares along axis 0)
637
- expected = np .sum (large_2d_array ** 2 , axis = 0 )
659
+ expected = np .sum (large_2d_array ** 2 , axis = 0 )
638
660
expected_count = np .full (large_2d_array .shape [1 ], large_2d_array .shape [0 ])
639
661
640
662
tm .assert_numpy_array_equal (result , expected )
641
663
tm .assert_numpy_array_equal (count , expected_count )
642
664
643
665
def test_nb_reduce_sum_square_axis_1_multithreaded (self , large_2d_array ):
644
666
"""Test sum_square reduction along axis 1 on large array."""
645
- result , count = nb_reduce ("sum_square" , large_2d_array , axis = 1 ,
646
- multi_threading = True )
667
+ result , count = nb_reduce (
668
+ "sum_square" , large_2d_array , axis = 1 , multi_threading = True
669
+ )
647
670
648
671
# Compare with numpy result (sum of squares along axis 1)
649
- expected = np .sum (large_2d_array ** 2 , axis = 1 )
672
+ expected = np .sum (large_2d_array ** 2 , axis = 1 )
650
673
expected_count = np .full (large_2d_array .shape [0 ], large_2d_array .shape [1 ])
651
674
652
675
np .testing .assert_array_almost_equal (result , expected )
@@ -687,8 +710,9 @@ def timedelta64_2d_array_with_nat(self):
687
710
688
711
def test_nb_reduce_timedelta64_sum_axis_none (self , timedelta64_2d_array ):
689
712
"""Test sum reduction on timedelta64 array with axis=None."""
690
- result , count = nb_reduce ("sum" , timedelta64_2d_array , axis = None ,
691
- multi_threading = True )
713
+ result , count = nb_reduce (
714
+ "sum" , timedelta64_2d_array , axis = None , multi_threading = True
715
+ )
692
716
693
717
# Compare with numpy result
694
718
expected = np .sum (timedelta64_2d_array )
@@ -699,42 +723,48 @@ def test_nb_reduce_timedelta64_sum_axis_none(self, timedelta64_2d_array):
699
723
700
724
def test_nb_reduce_timedelta64_sum_axis_0 (self , timedelta64_2d_array ):
701
725
"""Test sum reduction on timedelta64 array along axis 0."""
702
- result , count = nb_reduce ("sum" , timedelta64_2d_array , axis = 0 ,
703
- multi_threading = True )
726
+ result , count = nb_reduce (
727
+ "sum" , timedelta64_2d_array , axis = 0 , multi_threading = True
728
+ )
704
729
705
730
# Compare with numpy result
706
731
expected = np .sum (timedelta64_2d_array , axis = 0 )
707
- expected_count = np .full (timedelta64_2d_array .shape [1 ],
708
- timedelta64_2d_array .shape [0 ])
732
+ expected_count = np .full (
733
+ timedelta64_2d_array .shape [1 ], timedelta64_2d_array .shape [0 ]
734
+ )
709
735
710
736
tm .assert_numpy_array_equal (result , expected )
711
737
tm .assert_numpy_array_equal (count , expected_count )
712
738
713
739
def test_nb_reduce_timedelta64_sum_axis_1 (self , timedelta64_2d_array ):
714
740
"""Test sum reduction on timedelta64 array along axis 1."""
715
- result , count = nb_reduce ("sum" , timedelta64_2d_array , axis = 1 ,
716
- multi_threading = True )
741
+ result , count = nb_reduce (
742
+ "sum" , timedelta64_2d_array , axis = 1 , multi_threading = True
743
+ )
717
744
718
745
# Compare with numpy result
719
746
expected = np .sum (timedelta64_2d_array , axis = 1 )
720
- expected_count = np .full (timedelta64_2d_array .shape [0 ],
721
- timedelta64_2d_array .shape [1 ])
747
+ expected_count = np .full (
748
+ timedelta64_2d_array .shape [0 ], timedelta64_2d_array .shape [1 ]
749
+ )
722
750
723
751
tm .assert_numpy_array_equal (result , expected )
724
752
tm .assert_numpy_array_equal (count , expected_count )
725
753
726
754
def test_nb_reduce_timedelta64_min_max (self , timedelta64_2d_array ):
727
755
"""Test min/max reduction on timedelta64 array."""
728
756
# Test min
729
- result_min , count_min = nb_reduce ("min" , timedelta64_2d_array , axis = None ,
730
- multi_threading = True )
757
+ result_min , count_min = nb_reduce (
758
+ "min" , timedelta64_2d_array , axis = None , multi_threading = True
759
+ )
731
760
expected_min = np .min (timedelta64_2d_array )
732
761
assert result_min == expected_min
733
762
assert count_min == timedelta64_2d_array .size
734
763
735
764
# Test max
736
- result_max , count_max = nb_reduce ("max" , timedelta64_2d_array , axis = None ,
737
- multi_threading = True )
765
+ result_max , count_max = nb_reduce (
766
+ "max" , timedelta64_2d_array , axis = None , multi_threading = True
767
+ )
738
768
expected_max = np .max (timedelta64_2d_array )
739
769
assert result_max == expected_max
740
770
assert count_max == timedelta64_2d_array .size
@@ -743,8 +773,13 @@ def test_nb_reduce_timedelta64_with_nat_skipna_true(
743
773
self , timedelta64_2d_array_with_nat
744
774
):
745
775
"""Test reduction on timedelta64 array with NaT values, skipna=True."""
746
- result , count = nb_reduce ("sum" , timedelta64_2d_array_with_nat , axis = None ,
747
- skipna = True , multi_threading = True )
776
+ result , count = nb_reduce (
777
+ "sum" ,
778
+ timedelta64_2d_array_with_nat ,
779
+ axis = None ,
780
+ skipna = True ,
781
+ multi_threading = True ,
782
+ )
748
783
749
784
# Compare with numpy result
750
785
# For timedelta64 with NaT, we need to use nansum equivalent
@@ -759,21 +794,28 @@ def test_nb_reduce_timedelta64_with_nat_skipna_false(
759
794
self , timedelta64_2d_array_with_nat
760
795
):
761
796
"""Test reduction on timedelta64 array with NaT values, skipna=False."""
762
- result , count = nb_reduce ("sum" , timedelta64_2d_array_with_nat , axis = None ,
763
- skipna = False , multi_threading = True )
797
+ result , count = nb_reduce (
798
+ "sum" ,
799
+ timedelta64_2d_array_with_nat ,
800
+ axis = None ,
801
+ skipna = False ,
802
+ multi_threading = True ,
803
+ )
764
804
765
805
# When skipna=False and there are NaT values, result should be NaT
766
806
assert np .isnat (result )
767
807
768
808
def test_nb_reduce_timedelta64_mean_axis_0 (self , timedelta64_2d_array ):
769
809
"""Test mean reduction on timedelta64 array along axis 0."""
770
- result , count = nb_reduce ("mean" , timedelta64_2d_array , axis = 0 ,
771
- multi_threading = True )
810
+ result , count = nb_reduce (
811
+ "mean" , timedelta64_2d_array , axis = 0 , multi_threading = True
812
+ )
772
813
773
814
# Compare with numpy result
774
815
expected = np .mean (timedelta64_2d_array , axis = 0 )
775
- expected_count = np .full (timedelta64_2d_array .shape [1 ],
776
- timedelta64_2d_array .shape [0 ])
816
+ expected_count = np .full (
817
+ timedelta64_2d_array .shape [1 ], timedelta64_2d_array .shape [0 ]
818
+ )
777
819
778
820
tm .assert_numpy_array_equal (result , expected )
779
821
tm .assert_numpy_array_equal (count , expected_count )
0 commit comments