[RISCV] and x (sub -1, y) -> andn x y #129253

spaits · 2025-02-28T14:40:29Z

Try to address issue #125354 .

llvmbot · 2025-02-28T14:41:01Z

@llvm/pr-subscribers-llvm-globalisel

@llvm/pr-subscribers-backend-risc-v

Author: Gábor Spaits (spaits)

Changes

Address issue #125354 for RISC-V target and add a pattern (and x (sub 0, y)) -> (andn x y).

If this is accepted, the I would like to do the same for ARM and X86 too.

Full diff: https://github.com/llvm/llvm-project/pull/129253.diff

5 Files Affected:

(modified) llvm/lib/Target/RISCV/RISCVInstrInfoZb.td (+1)
(modified) llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb-zbkb.ll (+234-95)
(modified) llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb-zbkb.ll (+27)
(modified) llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll (+73-21)
(modified) llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll (+27)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index 2ce909c5d0e21..92d126fb8bcf9 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -483,6 +483,7 @@ def invLogicImm : ComplexPattern<XLenVT, 1, "selectInvLogicImm", [], [], 0>;
 
 let Predicates = [HasStdExtZbbOrZbkb] in {
 def : Pat<(XLenVT (and GPR:$rs1, (not GPR:$rs2))), (ANDN GPR:$rs1, GPR:$rs2)>;
+def : Pat<(XLenVT (and GPR:$rs1, (sub 0, GPR:$rs2))), (ANDN GPR:$rs1, GPR:$rs2)>;
 def : Pat<(XLenVT (or  GPR:$rs1, (not GPR:$rs2))), (ORN  GPR:$rs1, GPR:$rs2)>;
 def : Pat<(XLenVT (xor GPR:$rs1, (not GPR:$rs2))), (XNOR GPR:$rs1, GPR:$rs2)>;
 
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb-zbkb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb-zbkb.ll
index ababec16f7f8f..03a3e4b3ff742 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb-zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb-zbkb.ll
@@ -22,6 +22,22 @@ define i32 @andn_i32(i32 %a, i32 %b) nounwind {
   ret i32 %and
 }
 
+define i32 @andn_i32_from_sub(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: andn_i32_from_sub:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    neg a1, a1
+; RV32I-NEXT:    and a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-ZBKB-LABEL: andn_i32_from_sub:
+; RV32ZBB-ZBKB:       # %bb.0:
+; RV32ZBB-ZBKB-NEXT:    andn a0, a0, a1
+; RV32ZBB-ZBKB-NEXT:    ret
+  %neg = sub i32 0, %b
+  %and = and i32 %neg, %a
+  ret i32 %and
+}
+
 define i64 @andn_i64(i64 %a, i64 %b) nounwind {
 ; RV32I-LABEL: andn_i64:
 ; RV32I:       # %bb.0:
@@ -41,6 +57,30 @@ define i64 @andn_i64(i64 %a, i64 %b) nounwind {
   ret i64 %and
 }
 
+define i64 @andn_i64_from_sub(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: andn_i64_from_sub:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    neg a4, a2
+; RV32I-NEXT:    snez a2, a2
+; RV32I-NEXT:    neg a3, a3
+; RV32I-NEXT:    sub a3, a3, a2
+; RV32I-NEXT:    and a0, a4, a0
+; RV32I-NEXT:    and a1, a3, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-ZBKB-LABEL: andn_i64_from_sub:
+; RV32ZBB-ZBKB:       # %bb.0:
+; RV32ZBB-ZBKB-NEXT:    snez a4, a2
+; RV32ZBB-ZBKB-NEXT:    neg a3, a3
+; RV32ZBB-ZBKB-NEXT:    sub a3, a3, a4
+; RV32ZBB-ZBKB-NEXT:    andn a0, a0, a2
+; RV32ZBB-ZBKB-NEXT:    and a1, a3, a1
+; RV32ZBB-ZBKB-NEXT:    ret
+  %neg = sub i64 0, %b
+  %and = and i64 %neg, %a
+  ret i64 %and
+}
+
 define i32 @orn_i32(i32 %a, i32 %b) nounwind {
 ; RV32I-LABEL: orn_i32:
 ; RV32I:       # %bb.0:
@@ -136,53 +176,102 @@ define i32 @rol_i32(i32 %a, i32 %b) nounwind {
 declare i64 @llvm.fshl.i64(i64, i64, i64)
 
 define i64 @rol_i64(i64 %a, i64 %b) nounwind {
-; CHECK-LABEL: rol_i64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    andi a6, a2, 63
-; CHECK-NEXT:    li a4, 32
-; CHECK-NEXT:    bltu a6, a4, .LBB7_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a3, 0
-; CHECK-NEXT:    sll a7, a0, a6
-; CHECK-NEXT:    j .LBB7_3
-; CHECK-NEXT:  .LBB7_2:
-; CHECK-NEXT:    sll a3, a0, a2
-; CHECK-NEXT:    neg a5, a6
-; CHECK-NEXT:    srl a5, a0, a5
-; CHECK-NEXT:    sll a7, a1, a2
-; CHECK-NEXT:    or a7, a5, a7
-; CHECK-NEXT:  .LBB7_3:
-; CHECK-NEXT:    neg a5, a2
-; CHECK-NEXT:    mv a2, a1
-; CHECK-NEXT:    beqz a6, .LBB7_5
-; CHECK-NEXT:  # %bb.4:
-; CHECK-NEXT:    mv a2, a7
-; CHECK-NEXT:  .LBB7_5:
-; CHECK-NEXT:    andi a6, a5, 63
-; CHECK-NEXT:    bltu a6, a4, .LBB7_7
-; CHECK-NEXT:  # %bb.6:
-; CHECK-NEXT:    srl a7, a1, a6
-; CHECK-NEXT:    bnez a6, .LBB7_8
-; CHECK-NEXT:    j .LBB7_9
-; CHECK-NEXT:  .LBB7_7:
-; CHECK-NEXT:    srl a7, a0, a5
-; CHECK-NEXT:    neg t0, a6
-; CHECK-NEXT:    sll t0, a1, t0
-; CHECK-NEXT:    or a7, a7, t0
-; CHECK-NEXT:    beqz a6, .LBB7_9
-; CHECK-NEXT:  .LBB7_8:
-; CHECK-NEXT:    mv a0, a7
-; CHECK-NEXT:  .LBB7_9:
-; CHECK-NEXT:    bltu a6, a4, .LBB7_11
-; CHECK-NEXT:  # %bb.10:
-; CHECK-NEXT:    li a1, 0
-; CHECK-NEXT:    j .LBB7_12
-; CHECK-NEXT:  .LBB7_11:
-; CHECK-NEXT:    srl a1, a1, a5
-; CHECK-NEXT:  .LBB7_12:
-; CHECK-NEXT:    or a0, a3, a0
-; CHECK-NEXT:    or a1, a2, a1
-; CHECK-NEXT:    ret
+; RV32I-LABEL: rol_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    andi a6, a2, 63
+; RV32I-NEXT:    li a4, 32
+; RV32I-NEXT:    bltu a6, a4, .LBB9_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    sll a7, a0, a6
+; RV32I-NEXT:    j .LBB9_3
+; RV32I-NEXT:  .LBB9_2:
+; RV32I-NEXT:    sll a3, a0, a2
+; RV32I-NEXT:    neg a5, a6
+; RV32I-NEXT:    srl a5, a0, a5
+; RV32I-NEXT:    sll a7, a1, a2
+; RV32I-NEXT:    or a7, a5, a7
+; RV32I-NEXT:  .LBB9_3:
+; RV32I-NEXT:    neg a5, a2
+; RV32I-NEXT:    mv a2, a1
+; RV32I-NEXT:    beqz a6, .LBB9_5
+; RV32I-NEXT:  # %bb.4:
+; RV32I-NEXT:    mv a2, a7
+; RV32I-NEXT:  .LBB9_5:
+; RV32I-NEXT:    andi a6, a5, 63
+; RV32I-NEXT:    bltu a6, a4, .LBB9_7
+; RV32I-NEXT:  # %bb.6:
+; RV32I-NEXT:    srl a7, a1, a6
+; RV32I-NEXT:    bnez a6, .LBB9_8
+; RV32I-NEXT:    j .LBB9_9
+; RV32I-NEXT:  .LBB9_7:
+; RV32I-NEXT:    srl a7, a0, a5
+; RV32I-NEXT:    neg t0, a6
+; RV32I-NEXT:    sll t0, a1, t0
+; RV32I-NEXT:    or a7, a7, t0
+; RV32I-NEXT:    beqz a6, .LBB9_9
+; RV32I-NEXT:  .LBB9_8:
+; RV32I-NEXT:    mv a0, a7
+; RV32I-NEXT:  .LBB9_9:
+; RV32I-NEXT:    bltu a6, a4, .LBB9_11
+; RV32I-NEXT:  # %bb.10:
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    j .LBB9_12
+; RV32I-NEXT:  .LBB9_11:
+; RV32I-NEXT:    srl a1, a1, a5
+; RV32I-NEXT:  .LBB9_12:
+; RV32I-NEXT:    or a0, a3, a0
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-ZBKB-LABEL: rol_i64:
+; RV32ZBB-ZBKB:       # %bb.0:
+; RV32ZBB-ZBKB-NEXT:    andi a6, a2, 63
+; RV32ZBB-ZBKB-NEXT:    li a4, 32
+; RV32ZBB-ZBKB-NEXT:    bltu a6, a4, .LBB9_2
+; RV32ZBB-ZBKB-NEXT:  # %bb.1:
+; RV32ZBB-ZBKB-NEXT:    li a3, 0
+; RV32ZBB-ZBKB-NEXT:    sll a7, a0, a6
+; RV32ZBB-ZBKB-NEXT:    j .LBB9_3
+; RV32ZBB-ZBKB-NEXT:  .LBB9_2:
+; RV32ZBB-ZBKB-NEXT:    sll a3, a0, a2
+; RV32ZBB-ZBKB-NEXT:    neg a5, a6
+; RV32ZBB-ZBKB-NEXT:    srl a5, a0, a5
+; RV32ZBB-ZBKB-NEXT:    sll a7, a1, a2
+; RV32ZBB-ZBKB-NEXT:    or a7, a5, a7
+; RV32ZBB-ZBKB-NEXT:  .LBB9_3:
+; RV32ZBB-ZBKB-NEXT:    li t0, 63
+; RV32ZBB-ZBKB-NEXT:    mv a5, a1
+; RV32ZBB-ZBKB-NEXT:    beqz a6, .LBB9_5
+; RV32ZBB-ZBKB-NEXT:  # %bb.4:
+; RV32ZBB-ZBKB-NEXT:    mv a5, a7
+; RV32ZBB-ZBKB-NEXT:  .LBB9_5:
+; RV32ZBB-ZBKB-NEXT:    andn a6, t0, a2
+; RV32ZBB-ZBKB-NEXT:    neg a2, a2
+; RV32ZBB-ZBKB-NEXT:    bltu a6, a4, .LBB9_7
+; RV32ZBB-ZBKB-NEXT:  # %bb.6:
+; RV32ZBB-ZBKB-NEXT:    srl a7, a1, a6
+; RV32ZBB-ZBKB-NEXT:    bnez a6, .LBB9_8
+; RV32ZBB-ZBKB-NEXT:    j .LBB9_9
+; RV32ZBB-ZBKB-NEXT:  .LBB9_7:
+; RV32ZBB-ZBKB-NEXT:    srl a7, a0, a2
+; RV32ZBB-ZBKB-NEXT:    neg t0, a6
+; RV32ZBB-ZBKB-NEXT:    sll t0, a1, t0
+; RV32ZBB-ZBKB-NEXT:    or a7, a7, t0
+; RV32ZBB-ZBKB-NEXT:    beqz a6, .LBB9_9
+; RV32ZBB-ZBKB-NEXT:  .LBB9_8:
+; RV32ZBB-ZBKB-NEXT:    mv a0, a7
+; RV32ZBB-ZBKB-NEXT:  .LBB9_9:
+; RV32ZBB-ZBKB-NEXT:    bltu a6, a4, .LBB9_11
+; RV32ZBB-ZBKB-NEXT:  # %bb.10:
+; RV32ZBB-ZBKB-NEXT:    li a1, 0
+; RV32ZBB-ZBKB-NEXT:    j .LBB9_12
+; RV32ZBB-ZBKB-NEXT:  .LBB9_11:
+; RV32ZBB-ZBKB-NEXT:    srl a1, a1, a2
+; RV32ZBB-ZBKB-NEXT:  .LBB9_12:
+; RV32ZBB-ZBKB-NEXT:    or a0, a3, a0
+; RV32ZBB-ZBKB-NEXT:    or a1, a5, a1
+; RV32ZBB-ZBKB-NEXT:    ret
   %or = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %b)
   ret i64 %or
 }
@@ -212,54 +301,104 @@ define i32 @ror_i32(i32 %a, i32 %b) nounwind {
 declare i64 @llvm.fshr.i64(i64, i64, i64)
 
 define i64 @ror_i64(i64 %a, i64 %b) nounwind {
-; CHECK-LABEL: ror_i64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    andi a5, a2, 63
-; CHECK-NEXT:    li a4, 32
-; CHECK-NEXT:    bltu a5, a4, .LBB9_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    srl a6, a1, a5
-; CHECK-NEXT:    mv a3, a0
-; CHECK-NEXT:    bnez a5, .LBB9_3
-; CHECK-NEXT:    j .LBB9_4
-; CHECK-NEXT:  .LBB9_2:
-; CHECK-NEXT:    srl a3, a0, a2
-; CHECK-NEXT:    neg a6, a5
-; CHECK-NEXT:    sll a6, a1, a6
-; CHECK-NEXT:    or a6, a3, a6
-; CHECK-NEXT:    mv a3, a0
-; CHECK-NEXT:    beqz a5, .LBB9_4
-; CHECK-NEXT:  .LBB9_3:
-; CHECK-NEXT:    mv a3, a6
-; CHECK-NEXT:  .LBB9_4:
-; CHECK-NEXT:    neg a6, a2
-; CHECK-NEXT:    bltu a5, a4, .LBB9_7
-; CHECK-NEXT:  # %bb.5:
-; CHECK-NEXT:    li a2, 0
-; CHECK-NEXT:    andi a5, a6, 63
-; CHECK-NEXT:    bgeu a5, a4, .LBB9_8
-; CHECK-NEXT:  .LBB9_6:
-; CHECK-NEXT:    sll a4, a0, a6
-; CHECK-NEXT:    neg a7, a5
-; CHECK-NEXT:    srl a0, a0, a7
-; CHECK-NEXT:    sll a6, a1, a6
-; CHECK-NEXT:    or a0, a0, a6
-; CHECK-NEXT:    bnez a5, .LBB9_9
-; CHECK-NEXT:    j .LBB9_10
-; CHECK-NEXT:  .LBB9_7:
-; CHECK-NEXT:    srl a2, a1, a2
-; CHECK-NEXT:    andi a5, a6, 63
-; CHECK-NEXT:    bltu a5, a4, .LBB9_6
-; CHECK-NEXT:  .LBB9_8:
-; CHECK-NEXT:    li a4, 0
-; CHECK-NEXT:    sll a0, a0, a5
-; CHECK-NEXT:    beqz a5, .LBB9_10
-; CHECK-NEXT:  .LBB9_9:
-; CHECK-NEXT:    mv a1, a0
-; CHECK-NEXT:  .LBB9_10:
-; CHECK-NEXT:    or a0, a3, a4
-; CHECK-NEXT:    or a1, a2, a1
-; CHECK-NEXT:    ret
+; RV32I-LABEL: ror_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    andi a5, a2, 63
+; RV32I-NEXT:    li a4, 32
+; RV32I-NEXT:    bltu a5, a4, .LBB11_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    srl a6, a1, a5
+; RV32I-NEXT:    mv a3, a0
+; RV32I-NEXT:    bnez a5, .LBB11_3
+; RV32I-NEXT:    j .LBB11_4
+; RV32I-NEXT:  .LBB11_2:
+; RV32I-NEXT:    srl a3, a0, a2
+; RV32I-NEXT:    neg a6, a5
+; RV32I-NEXT:    sll a6, a1, a6
+; RV32I-NEXT:    or a6, a3, a6
+; RV32I-NEXT:    mv a3, a0
+; RV32I-NEXT:    beqz a5, .LBB11_4
+; RV32I-NEXT:  .LBB11_3:
+; RV32I-NEXT:    mv a3, a6
+; RV32I-NEXT:  .LBB11_4:
+; RV32I-NEXT:    neg a6, a2
+; RV32I-NEXT:    bltu a5, a4, .LBB11_7
+; RV32I-NEXT:  # %bb.5:
+; RV32I-NEXT:    li a2, 0
+; RV32I-NEXT:    andi a5, a6, 63
+; RV32I-NEXT:    bgeu a5, a4, .LBB11_8
+; RV32I-NEXT:  .LBB11_6:
+; RV32I-NEXT:    sll a4, a0, a6
+; RV32I-NEXT:    neg a7, a5
+; RV32I-NEXT:    srl a0, a0, a7
+; RV32I-NEXT:    sll a6, a1, a6
+; RV32I-NEXT:    or a0, a0, a6
+; RV32I-NEXT:    bnez a5, .LBB11_9
+; RV32I-NEXT:    j .LBB11_10
+; RV32I-NEXT:  .LBB11_7:
+; RV32I-NEXT:    srl a2, a1, a2
+; RV32I-NEXT:    andi a5, a6, 63
+; RV32I-NEXT:    bltu a5, a4, .LBB11_6
+; RV32I-NEXT:  .LBB11_8:
+; RV32I-NEXT:    li a4, 0
+; RV32I-NEXT:    sll a0, a0, a5
+; RV32I-NEXT:    beqz a5, .LBB11_10
+; RV32I-NEXT:  .LBB11_9:
+; RV32I-NEXT:    mv a1, a0
+; RV32I-NEXT:  .LBB11_10:
+; RV32I-NEXT:    or a0, a3, a4
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-ZBKB-LABEL: ror_i64:
+; RV32ZBB-ZBKB:       # %bb.0:
+; RV32ZBB-ZBKB-NEXT:    andi a4, a2, 63
+; RV32ZBB-ZBKB-NEXT:    li a5, 32
+; RV32ZBB-ZBKB-NEXT:    bltu a4, a5, .LBB11_2
+; RV32ZBB-ZBKB-NEXT:  # %bb.1:
+; RV32ZBB-ZBKB-NEXT:    srl a6, a1, a4
+; RV32ZBB-ZBKB-NEXT:    mv a3, a0
+; RV32ZBB-ZBKB-NEXT:    bnez a4, .LBB11_3
+; RV32ZBB-ZBKB-NEXT:    j .LBB11_4
+; RV32ZBB-ZBKB-NEXT:  .LBB11_2:
+; RV32ZBB-ZBKB-NEXT:    srl a3, a0, a2
+; RV32ZBB-ZBKB-NEXT:    neg a6, a4
+; RV32ZBB-ZBKB-NEXT:    sll a6, a1, a6
+; RV32ZBB-ZBKB-NEXT:    or a6, a3, a6
+; RV32ZBB-ZBKB-NEXT:    mv a3, a0
+; RV32ZBB-ZBKB-NEXT:    beqz a4, .LBB11_4
+; RV32ZBB-ZBKB-NEXT:  .LBB11_3:
+; RV32ZBB-ZBKB-NEXT:    mv a3, a6
+; RV32ZBB-ZBKB-NEXT:  .LBB11_4:
+; RV32ZBB-ZBKB-NEXT:    li a6, 63
+; RV32ZBB-ZBKB-NEXT:    bltu a4, a5, .LBB11_7
+; RV32ZBB-ZBKB-NEXT:  # %bb.5:
+; RV32ZBB-ZBKB-NEXT:    li a4, 0
+; RV32ZBB-ZBKB-NEXT:    andn a6, a6, a2
+; RV32ZBB-ZBKB-NEXT:    bgeu a6, a5, .LBB11_8
+; RV32ZBB-ZBKB-NEXT:  .LBB11_6:
+; RV32ZBB-ZBKB-NEXT:    neg a5, a2
+; RV32ZBB-ZBKB-NEXT:    neg a7, a6
+; RV32ZBB-ZBKB-NEXT:    sll a2, a0, a5
+; RV32ZBB-ZBKB-NEXT:    srl a0, a0, a7
+; RV32ZBB-ZBKB-NEXT:    sll a5, a1, a5
+; RV32ZBB-ZBKB-NEXT:    or a0, a0, a5
+; RV32ZBB-ZBKB-NEXT:    bnez a6, .LBB11_9
+; RV32ZBB-ZBKB-NEXT:    j .LBB11_10
+; RV32ZBB-ZBKB-NEXT:  .LBB11_7:
+; RV32ZBB-ZBKB-NEXT:    srl a4, a1, a2
+; RV32ZBB-ZBKB-NEXT:    andn a6, a6, a2
+; RV32ZBB-ZBKB-NEXT:    bltu a6, a5, .LBB11_6
+; RV32ZBB-ZBKB-NEXT:  .LBB11_8:
+; RV32ZBB-ZBKB-NEXT:    li a2, 0
+; RV32ZBB-ZBKB-NEXT:    sll a0, a0, a6
+; RV32ZBB-ZBKB-NEXT:    beqz a6, .LBB11_10
+; RV32ZBB-ZBKB-NEXT:  .LBB11_9:
+; RV32ZBB-ZBKB-NEXT:    mv a1, a0
+; RV32ZBB-ZBKB-NEXT:  .LBB11_10:
+; RV32ZBB-ZBKB-NEXT:    or a0, a3, a2
+; RV32ZBB-ZBKB-NEXT:    or a1, a4, a1
+; RV32ZBB-ZBKB-NEXT:    ret
   %or = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %b)
   ret i64 %or
 }
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb-zbkb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb-zbkb.ll
index 79d08772e8853..842ef1b215cd1 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb-zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb-zbkb.ll
@@ -6,6 +6,17 @@
 ; RUN: llc -mtriple=riscv64 -global-isel -mattr=+zbkb -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s -check-prefixes=CHECK,RV64ZBB-ZBKB,RV64ZBKB
 
+define signext i32 @andn_i32_from_sub(i32 signext %a, i32 signext %b) nounwind {
+; CHECK-LABEL: andn_i32_from_sub:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    negw a1, a1
+; CHECK-NEXT:    and a0, a1, a0
+; CHECK-NEXT:    ret
+  %neg = sub i32 0, %b
+  %and = and i32 %neg, %a
+  ret i32 %and
+}
+
 define signext i32 @andn_i32(i32 signext %a, i32 signext %b) nounwind {
 ; RV64I-LABEL: andn_i32:
 ; RV64I:       # %bb.0:
@@ -38,6 +49,22 @@ define i64 @andn_i64(i64 %a, i64 %b) nounwind {
   ret i64 %and
 }
 
+define i64 @andn_i64_from_sub(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: andn_i64_from_sub:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    neg a1, a1
+; RV64I-NEXT:    and a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: andn_i64_from_sub:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    andn a0, a0, a1
+; RV64ZBB-ZBKB-NEXT:    ret
+  %neg = sub i64 0, %b
+  %and = and i64 %neg, %a
+  ret i64 %and
+}
+
 define signext i32 @orn_i32(i32 signext %a, i32 signext %b) nounwind {
 ; RV64I-LABEL: orn_i32:
 ; RV64I:       # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll b/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll
index b6344f88cddaa..045d0939d0764 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll
@@ -22,6 +22,22 @@ define i32 @andn_i32(i32 %a, i32 %b) nounwind {
   ret i32 %and
 }
 
+define i32 @andn_i32_from_sub(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: andn_i32_from_sub:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    neg a1, a1
+; RV32I-NEXT:    and a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-ZBKB-LABEL: andn_i32_from_sub:
+; RV32ZBB-ZBKB:       # %bb.0:
+; RV32ZBB-ZBKB-NEXT:    andn a0, a0, a1
+; RV32ZBB-ZBKB-NEXT:    ret
+  %neg = sub i32 0, %b
+  %and = and i32 %neg, %a
+  ret i32 %and
+}
+
 define i64 @andn_i64(i64 %a, i64 %b) nounwind {
 ; RV32I-LABEL: andn_i64:
 ; RV32I:       # %bb.0:
@@ -41,6 +57,29 @@ define i64 @andn_i64(i64 %a, i64 %b) nounwind {
   ret i64 %and
 }
 
+define i64 @andn_i64_from_sub(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: andn_i64_from_sub:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    snez a4, a2
+; RV32I-NEXT:    neg a3, a3
+; RV32I-NEXT:    neg a2, a2
+; RV32I-NEXT:    sub a3, a3, a4
+; RV32I-NEXT:    and a0, a2, a0
+; RV32I-NEXT:    and a1, a3, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-ZBKB-LABEL: andn_i64_from_sub:
+; RV32ZBB-ZBKB:       # %bb.0:
+; RV32ZBB-ZBKB-NEXT:    snez a4, a2
+; RV32ZBB-ZBKB-NEXT:    add a3, a3, a4
+; RV32ZBB-ZBKB-NEXT:    andn a0, a0, a2
+; RV32ZBB-ZBKB-NEXT:    andn a1, a1, a3
+; RV32ZBB-ZBKB-NEXT:    ret
+  %neg = sub i64 0, %b
+  %and = and i64 %neg, %a
+  ret i64 %and
+}
+
 define i32 @orn_i32(i32 %a, i32 %b) nounwind {
 ; RV32I-LABEL: orn_i32:
 ; RV32I:       # %bb.0:
@@ -141,15 +180,15 @@ define i64 @rol_i64(i64 %a, i64 %b) nounwind {
 ; CHECK-NEXT:    slli a5, a2, 26
 ; CHECK-NEXT:    srli a5, a5, 31
 ; CHECK-NEXT:    mv a3, a1
-; CHECK-NEXT:    bnez a5, .LBB7_2
+; CHECK-NEXT:    bnez a5, .LBB9_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a3, a0
-; CHECK-NEXT:  .LBB7_2:
+; CHECK-NEXT:  .LBB9_2:
 ; CHECK-NEXT:    sll a4, a3, a2
-; CHECK-NEXT:    bnez a5, .LBB7_4
+; CHECK-NEXT:    bnez a5, .LBB9_4
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    mv a0, a1
-; CHECK-NEXT:  .LBB7_4:
+; CHECK-NEXT:  .LBB9_4:
 ; CHECK-NEXT:    srli a1, a0, 1
 ; CHECK-NEXT:    not a5, a2
 ; CHECK-NEXT:    sll a2, a0, a2
@@ -192,15 +231,15 @@ define i64 @ror_i64(i64 %a, i64 %b) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    andi a5, a2, 32
 ; CHECK-NEXT:    mv a3, a0
-; CHECK-NEXT:    beqz a5, .LBB9_2
+; CHECK-NEXT:    beqz a5, .LBB11_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a3, a1
-; CHECK-NEXT:  .LBB9_2:
+; CHECK-NEXT:  .LBB11_2:
 ; CHECK-NEXT:    srl a4, a3, a2
-; CHECK-NEXT:    beqz a5, .LBB9_4
+; CHECK-NEXT:    beqz a5, .LBB11_4
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    mv a1, a0
-; CHECK-NEXT:  .LBB9_4:
+; CHECK-NEXT:  .LBB11_4:
 ; CHECK-NEXT:    slli a0, a1, 1
 ; CHECK-NEXT:    not a5, a2
 ; CHECK-NEXT:    srl a1, a1, a2
@@ -293,19 +332,32 @@ define i32 @not_shl_one_i32(i32 %x) {
 }
 
 define i64 @not_shl_one_i64(i64 %x) {
-; CHECK-LABEL: not_shl_one_i64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, a0, -32
-; CHECK-NEXT:    li a2, 1
-; CHECK-NEXT:    slti a1, a1, 0
-; CHECK-NEXT:    sll a0, a2, a0
-; CHECK-NEXT:    neg a2, a1
-; CHECK-NEXT:    addi a1, a1, -1
-; CHECK-NEXT:    and a2, a2, a0
-; CHECK-NEXT:    and a1, a1, a0
-; CHECK-NEXT:    not a0, a2
-; CHECK-NEXT:    not a1, a1
-; CHECK-NEXT:    ret
+; RV32I-LABEL: not_shl_one_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi a1, a0, -32
+; RV32I-NEXT:    li a2, 1
+; RV32I-NEXT:    slti a1, a1, 0
+; RV32I-NEXT:    sll a0, a2, a0
+; RV32I-NEXT:    neg a2, a1
+; RV32I-NEXT:    addi a1, a1, -1
+; RV32I-NEXT:    and a2, a2, a0
+; RV32I-NEXT:    and a1, a1, a0
+; RV32I-NEXT:    not a0, a2
+; RV32I-NEXT:    not a1, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-ZBKB-LABEL: not_shl_one_i64:
+; RV32ZBB-ZBKB:       # %bb.0:
+; RV32ZBB-ZBKB-NEXT:    li a1, 1
+; RV32ZBB-ZBKB-NEXT:    addi a2, a0, -32
+; RV32ZBB-ZBKB-NEXT:    sll a0, a1, a0
+; RV32ZBB-ZBKB-NEXT:    slti a1, a2, 0
+; RV32ZBB-ZBKB-NEXT:    addi a2, a1, -1
+; RV32ZBB-ZBKB-NEXT:    andn a1, a0, a1
+; RV32ZBB-ZBKB-NEXT:    and a2, a2, a0
+; RV32ZBB-ZBKB-NEXT:    not a0, a1
+; RV32ZBB-ZBKB-NEXT:    not a1, a2
+; RV32ZBB-ZBKB-NEXT:    ret
   %1 = shl i64 1, %x
   %2 = xor i64 %1, -1
   ret i64 %2
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll b/llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll
index bf077364c9c7a..b4ed47d9c4fbc 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll
@@ -22,6 +22,17 @@ define signext i32 @andn_i32(i32 signext %a, i32 signext %b) nounwind {
   ret i32 %and
 }
 
+define signext i32 @andn_i32_from_sub(i32 signext %a, i32 signext %b) nounwind {
+; CHECK-LABEL: andn_i32_from_sub:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    negw a1, a1
+; CHECK-NEXT:    and a0, a1, a0
+; CHECK-NEXT:    ret
+  %neg = sub i32 0, %b
+  %and = and i32 %neg, %a
+  ret i32 %and
+}
+
 define i64 @andn_i64(i64 %a, i64 %b) nounwind {
 ; RV64I-LABEL: andn_i64:
 ; RV64I:       # %bb.0:
@@ -38,6 +49,22 @@ define i64 @andn_i64(i64 %a, i64 %b) nounwind {
   ret i64 %and
 }
 
+define i64 @andn_i64_from_sub(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: andn_i64_from_sub:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    neg a1, a1
+; RV64I-NEXT:    and a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: andn_i64_from_sub:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    andn a0, a0, a1
+; RV64ZBB-ZBKB-NEXT:    ret
+  %neg = sub i64 0, %b
+  %and = and i64 %neg, %a
+  ret i64 %and
+}
+
 define signext i32 @orn_i32(i32 signext %a, i32 signext %b) nounwind {
 ; RV64I-LABEL: orn_i32:
 ; RV64I:       # %bb.0:

spaits · 2025-02-28T14:47:02Z

Maybe it would make sense, to add this pattern here too:
https://github.com/llvm/llvm-project/pull/129253/files#diff-ba86baa771a0e1e054ae55bdf33db0b9e48193a3582bc2cd73825af8e4013633R490

spaits · 2025-02-28T14:55:58Z

In this first attempt I have played it safe. I would be open to more generic solutions, if you think that would be nice.

Maybe use the target hook TargetLowering::hasAndNot and do some transformations if that returns with true?
But still, even if we have that, we can not be sure, that the given target uses two complement-s to represent negative numbers and I have not found any target hook that would give me information about negative number representation. Probably I just haven't looked hard enough.

jayfoad · 2025-02-28T14:58:38Z

and x (sub 0, y) -> andn x y

This seems completely wrong since they calculate different results. Perhaps you meant "and x (sub -1, y) -> andn x y"??

spaits · 2025-02-28T15:26:27Z

@jayfoad Yes you are right. Sorry for the mistake and thank you for noticing it.

spaits · 2025-02-28T15:40:30Z

and x (sub 0, y) -> andn x y

This seems completely wrong since they calculate different results. Perhaps you meant "and x (sub -1, y) -> andn x y"??

This is the case for signed integers. I see that. What if we have unsigned integers?
Maybe, in that case, we would still need 0 right?

jayfoad · 2025-02-28T15:43:22Z

and x (sub 0, y) -> andn x y

This seems completely wrong since they calculate different results. Perhaps you meant "and x (sub -1, y) -> andn x y"??

This is the case for signed integers. I see that. What if we have unsigned integers?

Fixed bitwidth 2's complement add and subtract work exactly the same for signed and unsigned integers - that's why we don't have separate signed/unsigned opcodes for them (like we do for udiv and sdiv for example).

topperc · 2025-02-28T16:25:34Z

Isn't sub -1, x canonicalized to xor x, -1?

spaits · 2025-02-28T16:26:26Z

Isn't sub -1, x canonicalized to xor, -1?

Currently, I think they are not.

spaits · 2025-02-28T16:29:20Z

Isn't sub -1, x canonicalized to xor x, -1?

Furthermore, if the InstCombiner sees the xor version it will replace it with sub. See https://godbolt.org/z/4ffxhPbz6 (InstCombinePass)
Maybe we shall make it back to sub in SelDAG? But only for thargets that support hasAndNot!

s-barannikov · 2025-02-28T16:42:02Z

Isn't sub -1, x canonicalized to xor, -1?

Currently, I think they are not.

But it should be: https://github.com/llvm/llvm-project/blob/3f63e1c834e000d4ea95d667ae224cc232927196/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L3969

topperc · 2025-02-28T16:46:01Z

The added tests already produce andn.

spaits · 2025-02-28T16:50:14Z

The added tests already produce andn.

I get it now.
At first, I did this incorrectly, and matched to sub 0,y. And that rightfully didn't work.
After that I have fixed that to -1. And it already works.

Sorry for this.

After this I still don't get why the scenario shown in the issue doesn't work.

Do you have any idea for that?

topperc · 2025-02-28T16:55:06Z

The added tests already produce andn.

I get it now. At first, I did this incorrectly, and matched to sub 0,y. And that rightfully didn't work. After that I have fixed that to -1. And it already works.

Sorry for this.

After this I still don't get why the scenario shown in the issue doesn't work.

Do you have any idea for that?

I think you have to match all 4 instructions.

The full pattern is (and (add (add a, b) -1), (sub 0, b)). The adds need to be reassociated to do (add b, -1) first. Only then can it be reused by an andn

spaits · 2025-02-28T17:09:15Z

And maybe, it will be enough to work on the IR level, if the suggested IR transformations are done, we will get andn generated: https://godbolt.org/z/z4fqW6Ejf . (When the ticket was reported it wasn't that way https://godbolt.org/z/EEvhK9ovx) .

Sorry for my negligence and Thank you for you time and patience.

topperc · 2025-02-28T17:18:02Z

And maybe, it will be enough to work on the IR level, if the suggested IR transformations are done, we will get andn generated: https://godbolt.org/z/z4fqW6Ejf . (When the ticket was reported it wasn't that way https://godbolt.org/z/EEvhK9ovx) .

Sorry for my negligence and Thank you for you time and patience.

I don't think this can be optimized in IR. InstCombine will nearly always try to reduce critical depth if doesn't increase the number of instructions. This change is only profitable if andn exists for the target which InstCombine can't know. Generic DAGCombine will want to do the same.

Both InstCombine and DAGCombiner will want to turn (xor (add X, -1), -1) into (sub 0, X) regardless of how the xor is used or that (add X, -1) has another user.

cdevadas · 2025-03-03T03:56:03Z

Address issue #125354 for RISC-V target and add a pattern (and x (sub 0, y)) -> (andn x y).

Should fix the description. It still uses 0 in the pattern instead of -1.

[RISCV][SelDAG] and x (sub 0, y) -> andn x y

a1094b2

llvmbot added backend:RISC-V llvm:globalisel labels Feb 28, 2025

spaits marked this pull request as draft February 28, 2025 14:41

spaits marked this pull request as ready for review February 28, 2025 14:42

spaits changed the title ~~[RISCV][SelDAG] and x (sub 0, y) -> andn x y~~ [RISCV] and x (sub 0, y) -> andn x y Feb 28, 2025

It is -1 not 0

4a68349

spaits changed the title ~~[RISCV] and x (sub 0, y) -> andn x y~~ [RISCV] and x (sub -1, y) -> andn x y Feb 28, 2025

spaits requested a review from jayfoad February 28, 2025 15:30

spaits requested a review from topperc February 28, 2025 16:30

spaits requested a review from s-barannikov February 28, 2025 16:46

spaits marked this pull request as draft March 4, 2025 12:07

[RISCV] and x (sub -1, y) -> andn x y #129253

Are you sure you want to change the base?

[RISCV] and x (sub -1, y) -> andn x y #129253

Uh oh!

Conversation

spaits commented Feb 28, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Feb 28, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

spaits commented Feb 28, 2025

Uh oh!

spaits commented Feb 28, 2025

Uh oh!

jayfoad commented Feb 28, 2025

Uh oh!

spaits commented Feb 28, 2025

Uh oh!

spaits commented Feb 28, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

jayfoad commented Feb 28, 2025

Uh oh!

topperc commented Feb 28, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

spaits commented Feb 28, 2025

Uh oh!

spaits commented Feb 28, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

s-barannikov commented Feb 28, 2025

Uh oh!

topperc commented Feb 28, 2025

Uh oh!

spaits commented Feb 28, 2025

Uh oh!

topperc commented Feb 28, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

spaits commented Feb 28, 2025

Uh oh!

topperc commented Feb 28, 2025

Uh oh!

cdevadas commented Mar 3, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

6 participants

spaits commented Feb 28, 2025 •

edited

Loading

llvmbot commented Feb 28, 2025 •

edited

Loading

spaits commented Feb 28, 2025 •

edited

Loading

topperc commented Feb 28, 2025 •

edited

Loading

spaits commented Feb 28, 2025 •

edited

Loading

topperc commented Feb 28, 2025 •

edited

Loading