Skip to content

Commit 55d7531

Browse files
committed
Add sub 0, (min X, (sub 0, X))) --> (max X, (sub 0, X))
1 parent 1abcdc3 commit 55d7531

File tree

2 files changed

+249
-9
lines changed

2 files changed

+249
-9
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3951,18 +3951,36 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
39513951

39523952
// Similar to the previous rule, but this time targeting an expanded abs.
39533953
// (sub 0, (max X, (sub 0, X))) --> (min X, (sub 0, X))
3954-
// Note that this is applicable to both signed and unsigned min/max.
3954+
// as well as
3955+
// (sub 0, (min X, (sub 0, X))) --> (max X, (sub 0, X))
3956+
// Note that these two are applicable to both signed and unsigned min/max.
39553957
SDValue X;
39563958
SDValue S0;
3959+
auto NegPat = m_AllOf(m_Neg(m_Deferred(X)), m_Value(S0));
39573960
if (LegalOperations &&
3958-
sd_match(N1, m_OneUse(m_AnyOf(
3959-
m_SMax(m_Value(X),
3960-
m_AllOf(m_Neg(m_Deferred(X)), m_Value(S0))),
3961-
m_UMax(m_Value(X), m_AllOf(m_Neg(m_Deferred(X)),
3962-
m_Value(S0))))))) {
3963-
unsigned MinOpc = N1->getOpcode() == ISD::SMAX ? ISD::SMIN : ISD::UMIN;
3964-
if (hasOperation(MinOpc, VT))
3965-
return DAG.getNode(MinOpc, DL, VT, X, S0);
3961+
sd_match(N1, m_OneUse(m_AnyOf(m_SMax(m_Value(X), NegPat),
3962+
m_UMax(m_Value(X), NegPat),
3963+
m_SMin(m_Value(X), NegPat),
3964+
m_UMin(m_Value(X), NegPat))))) {
3965+
unsigned NewOpc = 0;
3966+
switch (N1->getOpcode()) {
3967+
case ISD::SMAX:
3968+
NewOpc = ISD::SMIN;
3969+
break;
3970+
case ISD::UMAX:
3971+
NewOpc = ISD::UMIN;
3972+
break;
3973+
case ISD::SMIN:
3974+
NewOpc = ISD::SMAX;
3975+
break;
3976+
case ISD::UMIN:
3977+
NewOpc = ISD::UMAX;
3978+
break;
3979+
default:
3980+
llvm_unreachable("unrecognized opcode");
3981+
}
3982+
if (hasOperation(NewOpc, VT))
3983+
return DAG.getNode(NewOpc, DL, VT, X, S0);
39663984
}
39673985

39683986
// Fold neg(splat(neg(x)) -> splat(x)

llvm/test/CodeGen/RISCV/neg-abs.ll

Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,3 +480,225 @@ define i64 @expanded_neg_abs64_unsigned(i64 %x) {
480480
%r = sub i64 0, %t
481481
ret i64 %r
482482
}
483+
484+
define i32 @expanded_neg_inv_abs32(i32 %x) {
485+
; RV32I-LABEL: expanded_neg_inv_abs32:
486+
; RV32I: # %bb.0:
487+
; RV32I-NEXT: neg a1, a0
488+
; RV32I-NEXT: blt a1, a0, .LBB10_2
489+
; RV32I-NEXT: # %bb.1:
490+
; RV32I-NEXT: mv a1, a0
491+
; RV32I-NEXT: .LBB10_2:
492+
; RV32I-NEXT: neg a0, a1
493+
; RV32I-NEXT: ret
494+
;
495+
; RV32ZBB-LABEL: expanded_neg_inv_abs32:
496+
; RV32ZBB: # %bb.0:
497+
; RV32ZBB-NEXT: neg a1, a0
498+
; RV32ZBB-NEXT: max a0, a0, a1
499+
; RV32ZBB-NEXT: ret
500+
;
501+
; RV64I-LABEL: expanded_neg_inv_abs32:
502+
; RV64I: # %bb.0:
503+
; RV64I-NEXT: sext.w a1, a0
504+
; RV64I-NEXT: negw a0, a0
505+
; RV64I-NEXT: blt a0, a1, .LBB10_2
506+
; RV64I-NEXT: # %bb.1:
507+
; RV64I-NEXT: mv a0, a1
508+
; RV64I-NEXT: .LBB10_2:
509+
; RV64I-NEXT: negw a0, a0
510+
; RV64I-NEXT: ret
511+
;
512+
; RV64ZBB-LABEL: expanded_neg_inv_abs32:
513+
; RV64ZBB: # %bb.0:
514+
; RV64ZBB-NEXT: sext.w a1, a0
515+
; RV64ZBB-NEXT: negw a0, a0
516+
; RV64ZBB-NEXT: min a0, a0, a1
517+
; RV64ZBB-NEXT: negw a0, a0
518+
; RV64ZBB-NEXT: ret
519+
%n = sub i32 0, %x
520+
%t = call i32 @llvm.smin.i32(i32 %n, i32 %x)
521+
%r = sub i32 0, %t
522+
ret i32 %r
523+
}
524+
525+
define i32 @expanded_neg_inv_abs32_unsigned(i32 %x) {
526+
; RV32I-LABEL: expanded_neg_inv_abs32_unsigned:
527+
; RV32I: # %bb.0:
528+
; RV32I-NEXT: neg a1, a0
529+
; RV32I-NEXT: bltu a1, a0, .LBB11_2
530+
; RV32I-NEXT: # %bb.1:
531+
; RV32I-NEXT: mv a1, a0
532+
; RV32I-NEXT: .LBB11_2:
533+
; RV32I-NEXT: neg a0, a1
534+
; RV32I-NEXT: ret
535+
;
536+
; RV32ZBB-LABEL: expanded_neg_inv_abs32_unsigned:
537+
; RV32ZBB: # %bb.0:
538+
; RV32ZBB-NEXT: neg a1, a0
539+
; RV32ZBB-NEXT: maxu a0, a0, a1
540+
; RV32ZBB-NEXT: ret
541+
;
542+
; RV64I-LABEL: expanded_neg_inv_abs32_unsigned:
543+
; RV64I: # %bb.0:
544+
; RV64I-NEXT: sext.w a1, a0
545+
; RV64I-NEXT: negw a0, a0
546+
; RV64I-NEXT: bltu a0, a1, .LBB11_2
547+
; RV64I-NEXT: # %bb.1:
548+
; RV64I-NEXT: mv a0, a1
549+
; RV64I-NEXT: .LBB11_2:
550+
; RV64I-NEXT: negw a0, a0
551+
; RV64I-NEXT: ret
552+
;
553+
; RV64ZBB-LABEL: expanded_neg_inv_abs32_unsigned:
554+
; RV64ZBB: # %bb.0:
555+
; RV64ZBB-NEXT: sext.w a1, a0
556+
; RV64ZBB-NEXT: negw a0, a0
557+
; RV64ZBB-NEXT: minu a0, a0, a1
558+
; RV64ZBB-NEXT: negw a0, a0
559+
; RV64ZBB-NEXT: ret
560+
%n = sub i32 0, %x
561+
%t = call i32 @llvm.umin.i32(i32 %n, i32 %x)
562+
%r = sub i32 0, %t
563+
ret i32 %r
564+
}
565+
566+
define i64 @expanded_neg_inv_abs64(i64 %x) {
567+
; RV32I-LABEL: expanded_neg_inv_abs64:
568+
; RV32I: # %bb.0:
569+
; RV32I-NEXT: snez a2, a0
570+
; RV32I-NEXT: neg a3, a1
571+
; RV32I-NEXT: sub a2, a3, a2
572+
; RV32I-NEXT: neg a3, a0
573+
; RV32I-NEXT: beq a2, a1, .LBB12_2
574+
; RV32I-NEXT: # %bb.1:
575+
; RV32I-NEXT: slt a4, a2, a1
576+
; RV32I-NEXT: beqz a4, .LBB12_3
577+
; RV32I-NEXT: j .LBB12_4
578+
; RV32I-NEXT: .LBB12_2:
579+
; RV32I-NEXT: sltu a4, a3, a0
580+
; RV32I-NEXT: bnez a4, .LBB12_4
581+
; RV32I-NEXT: .LBB12_3:
582+
; RV32I-NEXT: mv a2, a1
583+
; RV32I-NEXT: mv a3, a0
584+
; RV32I-NEXT: .LBB12_4:
585+
; RV32I-NEXT: snez a0, a3
586+
; RV32I-NEXT: add a0, a2, a0
587+
; RV32I-NEXT: neg a1, a0
588+
; RV32I-NEXT: neg a0, a3
589+
; RV32I-NEXT: ret
590+
;
591+
; RV32ZBB-LABEL: expanded_neg_inv_abs64:
592+
; RV32ZBB: # %bb.0:
593+
; RV32ZBB-NEXT: snez a2, a0
594+
; RV32ZBB-NEXT: neg a3, a1
595+
; RV32ZBB-NEXT: sub a2, a3, a2
596+
; RV32ZBB-NEXT: neg a3, a0
597+
; RV32ZBB-NEXT: beq a2, a1, .LBB12_2
598+
; RV32ZBB-NEXT: # %bb.1:
599+
; RV32ZBB-NEXT: slt a4, a2, a1
600+
; RV32ZBB-NEXT: beqz a4, .LBB12_3
601+
; RV32ZBB-NEXT: j .LBB12_4
602+
; RV32ZBB-NEXT: .LBB12_2:
603+
; RV32ZBB-NEXT: sltu a4, a3, a0
604+
; RV32ZBB-NEXT: bnez a4, .LBB12_4
605+
; RV32ZBB-NEXT: .LBB12_3:
606+
; RV32ZBB-NEXT: mv a2, a1
607+
; RV32ZBB-NEXT: mv a3, a0
608+
; RV32ZBB-NEXT: .LBB12_4:
609+
; RV32ZBB-NEXT: snez a0, a3
610+
; RV32ZBB-NEXT: add a0, a2, a0
611+
; RV32ZBB-NEXT: neg a1, a0
612+
; RV32ZBB-NEXT: neg a0, a3
613+
; RV32ZBB-NEXT: ret
614+
;
615+
; RV64I-LABEL: expanded_neg_inv_abs64:
616+
; RV64I: # %bb.0:
617+
; RV64I-NEXT: neg a1, a0
618+
; RV64I-NEXT: blt a1, a0, .LBB12_2
619+
; RV64I-NEXT: # %bb.1:
620+
; RV64I-NEXT: mv a1, a0
621+
; RV64I-NEXT: .LBB12_2:
622+
; RV64I-NEXT: neg a0, a1
623+
; RV64I-NEXT: ret
624+
;
625+
; RV64ZBB-LABEL: expanded_neg_inv_abs64:
626+
; RV64ZBB: # %bb.0:
627+
; RV64ZBB-NEXT: neg a1, a0
628+
; RV64ZBB-NEXT: max a0, a0, a1
629+
; RV64ZBB-NEXT: ret
630+
%n = sub i64 0, %x
631+
%t = call i64 @llvm.smin.i64(i64 %n, i64 %x)
632+
%r = sub i64 0, %t
633+
ret i64 %r
634+
}
635+
636+
define i64 @expanded_neg_inv_abs64_unsigned(i64 %x) {
637+
; RV32I-LABEL: expanded_neg_inv_abs64_unsigned:
638+
; RV32I: # %bb.0:
639+
; RV32I-NEXT: snez a2, a0
640+
; RV32I-NEXT: neg a3, a1
641+
; RV32I-NEXT: sub a2, a3, a2
642+
; RV32I-NEXT: neg a3, a0
643+
; RV32I-NEXT: beq a2, a1, .LBB13_2
644+
; RV32I-NEXT: # %bb.1:
645+
; RV32I-NEXT: sltu a4, a2, a1
646+
; RV32I-NEXT: beqz a4, .LBB13_3
647+
; RV32I-NEXT: j .LBB13_4
648+
; RV32I-NEXT: .LBB13_2:
649+
; RV32I-NEXT: sltu a4, a3, a0
650+
; RV32I-NEXT: bnez a4, .LBB13_4
651+
; RV32I-NEXT: .LBB13_3:
652+
; RV32I-NEXT: mv a2, a1
653+
; RV32I-NEXT: mv a3, a0
654+
; RV32I-NEXT: .LBB13_4:
655+
; RV32I-NEXT: snez a0, a3
656+
; RV32I-NEXT: add a0, a2, a0
657+
; RV32I-NEXT: neg a1, a0
658+
; RV32I-NEXT: neg a0, a3
659+
; RV32I-NEXT: ret
660+
;
661+
; RV32ZBB-LABEL: expanded_neg_inv_abs64_unsigned:
662+
; RV32ZBB: # %bb.0:
663+
; RV32ZBB-NEXT: snez a2, a0
664+
; RV32ZBB-NEXT: neg a3, a1
665+
; RV32ZBB-NEXT: sub a2, a3, a2
666+
; RV32ZBB-NEXT: neg a3, a0
667+
; RV32ZBB-NEXT: beq a2, a1, .LBB13_2
668+
; RV32ZBB-NEXT: # %bb.1:
669+
; RV32ZBB-NEXT: sltu a4, a2, a1
670+
; RV32ZBB-NEXT: beqz a4, .LBB13_3
671+
; RV32ZBB-NEXT: j .LBB13_4
672+
; RV32ZBB-NEXT: .LBB13_2:
673+
; RV32ZBB-NEXT: sltu a4, a3, a0
674+
; RV32ZBB-NEXT: bnez a4, .LBB13_4
675+
; RV32ZBB-NEXT: .LBB13_3:
676+
; RV32ZBB-NEXT: mv a2, a1
677+
; RV32ZBB-NEXT: mv a3, a0
678+
; RV32ZBB-NEXT: .LBB13_4:
679+
; RV32ZBB-NEXT: snez a0, a3
680+
; RV32ZBB-NEXT: add a0, a2, a0
681+
; RV32ZBB-NEXT: neg a1, a0
682+
; RV32ZBB-NEXT: neg a0, a3
683+
; RV32ZBB-NEXT: ret
684+
;
685+
; RV64I-LABEL: expanded_neg_inv_abs64_unsigned:
686+
; RV64I: # %bb.0:
687+
; RV64I-NEXT: neg a1, a0
688+
; RV64I-NEXT: bltu a1, a0, .LBB13_2
689+
; RV64I-NEXT: # %bb.1:
690+
; RV64I-NEXT: mv a1, a0
691+
; RV64I-NEXT: .LBB13_2:
692+
; RV64I-NEXT: neg a0, a1
693+
; RV64I-NEXT: ret
694+
;
695+
; RV64ZBB-LABEL: expanded_neg_inv_abs64_unsigned:
696+
; RV64ZBB: # %bb.0:
697+
; RV64ZBB-NEXT: neg a1, a0
698+
; RV64ZBB-NEXT: maxu a0, a0, a1
699+
; RV64ZBB-NEXT: ret
700+
%n = sub i64 0, %x
701+
%t = call i64 @llvm.umin.i64(i64 %n, i64 %x)
702+
%r = sub i64 0, %t
703+
ret i64 %r
704+
}

0 commit comments

Comments
 (0)