@@ -387,33 +387,52 @@ def test_merge_series(scalars_dfs, merge_how):
387
387
assert_pandas_df_equal (bf_result , pd_result , ignore_order = True )
388
388
389
389
390
- def test_cut (scalars_dfs ):
390
+ @pytest .mark .parametrize (
391
+ ("right" ),
392
+ [
393
+ pytest .param (True ),
394
+ pytest .param (False ),
395
+ ],
396
+ )
397
+ def test_cut (scalars_dfs , right ):
391
398
scalars_df , scalars_pandas_df = scalars_dfs
392
399
393
- pd_result = pd .cut (scalars_pandas_df ["float64_col" ], 5 , labels = False )
394
- bf_result = bpd .cut (scalars_df ["float64_col" ], 5 , labels = False )
400
+ pd_result = pd .cut (scalars_pandas_df ["float64_col" ], 5 , labels = False , right = right )
401
+ bf_result = bpd .cut (scalars_df ["float64_col" ], 5 , labels = False , right = right )
395
402
396
403
# make sure the result is a supported dtype
397
404
assert bf_result .dtype == bpd .Int64Dtype ()
398
405
pd_result = pd_result .astype ("Int64" )
399
406
pd .testing .assert_series_equal (bf_result .to_pandas (), pd_result )
400
407
401
408
402
- def test_cut_default_labels (scalars_dfs ):
409
+ @pytest .mark .parametrize (
410
+ ("right" ),
411
+ [
412
+ pytest .param (True ),
413
+ pytest .param (False ),
414
+ ],
415
+ )
416
+ def test_cut_default_labels (scalars_dfs , right ):
403
417
scalars_df , scalars_pandas_df = scalars_dfs
404
418
405
- pd_result = pd .cut (scalars_pandas_df ["float64_col" ], 5 )
406
- bf_result = bpd .cut (scalars_df ["float64_col" ], 5 ).to_pandas ()
419
+ pd_result = pd .cut (scalars_pandas_df ["float64_col" ], 5 , right = right )
420
+ bf_result = bpd .cut (scalars_df ["float64_col" ], 5 , right = right ).to_pandas ()
407
421
408
422
# Convert to match data format
423
+ pd_interval = pd_result .cat .categories [pd_result .cat .codes ]
424
+ if pd_interval .closed == "left" :
425
+ left_key = "left_inclusive"
426
+ right_key = "right_exclusive"
427
+ else :
428
+ left_key = "left_exclusive"
429
+ right_key = "right_inclusive"
409
430
pd_result_converted = pd .Series (
410
431
[
411
- {"left_exclusive" : interval .left , "right_inclusive" : interval .right }
432
+ {left_key : interval .left , right_key : interval .right }
412
433
if pd .notna (val )
413
434
else pd .NA
414
- for val , interval in zip (
415
- pd_result , pd_result .cat .categories [pd_result .cat .codes ]
416
- )
435
+ for val , interval in zip (pd_result , pd_interval )
417
436
],
418
437
name = pd_result .name ,
419
438
)
@@ -424,28 +443,35 @@ def test_cut_default_labels(scalars_dfs):
424
443
425
444
426
445
@pytest .mark .parametrize (
427
- ("breaks" ,),
446
+ ("breaks" , "right" ),
428
447
[
429
- ([0 , 5 , 10 , 15 , 20 , 100 , 1000 ],), # ints
430
- ([0.5 , 10.5 , 15.5 , 20.5 , 100.5 , 1000.5 ],), # floats
431
- ([0 , 5 , 10.5 , 15.5 , 20 , 100 , 1000.5 ],), # mixed
448
+ pytest .param ([0 , 5 , 10 , 15 , 20 , 100 , 1000 ], True , id = "int_right" ),
449
+ pytest .param ([0 , 5 , 10 , 15 , 20 , 100 , 1000 ], False , id = "int_left" ),
450
+ pytest .param ([0.5 , 10.5 , 15.5 , 20.5 , 100.5 , 1000.5 ], False , id = "float_left" ),
451
+ pytest .param ([0 , 5 , 10.5 , 15.5 , 20 , 100 , 1000.5 ], True , id = "mixed_right" ),
432
452
],
433
453
)
434
- def test_cut_numeric_breaks (scalars_dfs , breaks ):
454
+ def test_cut_numeric_breaks (scalars_dfs , breaks , right ):
435
455
scalars_df , scalars_pandas_df = scalars_dfs
436
456
437
- pd_result = pd .cut (scalars_pandas_df ["float64_col" ], breaks )
438
- bf_result = bpd .cut (scalars_df ["float64_col" ], breaks ).to_pandas ()
457
+ pd_result = pd .cut (scalars_pandas_df ["float64_col" ], breaks , right = right )
458
+ bf_result = bpd .cut (scalars_df ["float64_col" ], breaks , right = right ).to_pandas ()
439
459
440
460
# Convert to match data format
461
+ pd_interval = pd_result .cat .categories [pd_result .cat .codes ]
462
+ if pd_interval .closed == "left" :
463
+ left_key = "left_inclusive"
464
+ right_key = "right_exclusive"
465
+ else :
466
+ left_key = "left_exclusive"
467
+ right_key = "right_inclusive"
468
+
441
469
pd_result_converted = pd .Series (
442
470
[
443
- {"left_exclusive" : interval .left , "right_inclusive" : interval .right }
471
+ {left_key : interval .left , right_key : interval .right }
444
472
if pd .notna (val )
445
473
else pd .NA
446
- for val , interval in zip (
447
- pd_result , pd_result .cat .categories [pd_result .cat .codes ]
448
- )
474
+ for val , interval in zip (pd_result , pd_interval )
449
475
],
450
476
name = pd_result .name ,
451
477
)
@@ -476,29 +502,47 @@ def test_cut_errors(scalars_dfs, bins):
476
502
477
503
478
504
@pytest .mark .parametrize (
479
- ("bins" ,),
505
+ ("bins" , "right" ),
480
506
[
481
- ([(- 5 , 2 ), (2 , 3 ), (- 3000 , - 10 )],),
482
- (pd .IntervalIndex .from_tuples ([(1 , 2 ), (2 , 3 ), (4 , 5 )]),),
507
+ pytest .param ([(- 5 , 2 ), (2 , 3 ), (- 3000 , - 10 )], True , id = "tuple_right" ),
508
+ pytest .param ([(- 5 , 2 ), (2 , 3 ), (- 3000 , - 10 )], False , id = "tuple_left" ),
509
+ pytest .param (
510
+ pd .IntervalIndex .from_tuples ([(1 , 2 ), (2 , 3 ), (4 , 5 )]),
511
+ True ,
512
+ id = "interval_right" ,
513
+ ),
514
+ pytest .param (
515
+ pd .IntervalIndex .from_tuples ([(1 , 2 ), (2 , 3 ), (4 , 5 )]),
516
+ False ,
517
+ id = "interval_left" ,
518
+ ),
483
519
],
484
520
)
485
- def test_cut_with_interval (scalars_dfs , bins ):
521
+ def test_cut_with_interval (scalars_dfs , bins , right ):
486
522
scalars_df , scalars_pandas_df = scalars_dfs
487
- bf_result = bpd .cut (scalars_df ["int64_too" ], bins , labels = False ).to_pandas ()
523
+ bf_result = bpd .cut (
524
+ scalars_df ["int64_too" ], bins , labels = False , right = right
525
+ ).to_pandas ()
488
526
489
527
if isinstance (bins , list ):
490
528
bins = pd .IntervalIndex .from_tuples (bins )
491
- pd_result = pd .cut (scalars_pandas_df ["int64_too" ], bins , labels = False )
529
+ pd_result = pd .cut (scalars_pandas_df ["int64_too" ], bins , labels = False , right = right )
492
530
493
531
# Convert to match data format
532
+ pd_interval = pd_result .cat .categories [pd_result .cat .codes ]
533
+ if pd_interval .closed == "left" :
534
+ left_key = "left_inclusive"
535
+ right_key = "right_exclusive"
536
+ else :
537
+ left_key = "left_exclusive"
538
+ right_key = "right_inclusive"
539
+
494
540
pd_result_converted = pd .Series (
495
541
[
496
- {"left_exclusive" : interval .left , "right_inclusive" : interval .right }
542
+ {left_key : interval .left , right_key : interval .right }
497
543
if pd .notna (val )
498
544
else pd .NA
499
- for val , interval in zip (
500
- pd_result , pd_result .cat .categories [pd_result .cat .codes ]
501
- )
545
+ for val , interval in zip (pd_result , pd_interval )
502
546
],
503
547
name = pd_result .name ,
504
548
)
0 commit comments