-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[RISCV] and x (sub -1, y) -> andn x y #129253
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
|
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-risc-v Author: Gábor Spaits (spaits) ChangesAddress issue #125354 for RISC-V target and add a pattern If this is accepted, the I would like to do the same for ARM and X86 too. Full diff: https://github.com/llvm/llvm-project/pull/129253.diff 5 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index 2ce909c5d0e21..92d126fb8bcf9 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -483,6 +483,7 @@ def invLogicImm : ComplexPattern<XLenVT, 1, "selectInvLogicImm", [], [], 0>;
let Predicates = [HasStdExtZbbOrZbkb] in {
def : Pat<(XLenVT (and GPR:$rs1, (not GPR:$rs2))), (ANDN GPR:$rs1, GPR:$rs2)>;
+def : Pat<(XLenVT (and GPR:$rs1, (sub 0, GPR:$rs2))), (ANDN GPR:$rs1, GPR:$rs2)>;
def : Pat<(XLenVT (or GPR:$rs1, (not GPR:$rs2))), (ORN GPR:$rs1, GPR:$rs2)>;
def : Pat<(XLenVT (xor GPR:$rs1, (not GPR:$rs2))), (XNOR GPR:$rs1, GPR:$rs2)>;
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb-zbkb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb-zbkb.ll
index ababec16f7f8f..03a3e4b3ff742 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb-zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb-zbkb.ll
@@ -22,6 +22,22 @@ define i32 @andn_i32(i32 %a, i32 %b) nounwind {
ret i32 %and
}
+define i32 @andn_i32_from_sub(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: andn_i32_from_sub:
+; RV32I: # %bb.0:
+; RV32I-NEXT: neg a1, a1
+; RV32I-NEXT: and a0, a1, a0
+; RV32I-NEXT: ret
+;
+; RV32ZBB-ZBKB-LABEL: andn_i32_from_sub:
+; RV32ZBB-ZBKB: # %bb.0:
+; RV32ZBB-ZBKB-NEXT: andn a0, a0, a1
+; RV32ZBB-ZBKB-NEXT: ret
+ %neg = sub i32 0, %b
+ %and = and i32 %neg, %a
+ ret i32 %and
+}
+
define i64 @andn_i64(i64 %a, i64 %b) nounwind {
; RV32I-LABEL: andn_i64:
; RV32I: # %bb.0:
@@ -41,6 +57,30 @@ define i64 @andn_i64(i64 %a, i64 %b) nounwind {
ret i64 %and
}
+define i64 @andn_i64_from_sub(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: andn_i64_from_sub:
+; RV32I: # %bb.0:
+; RV32I-NEXT: neg a4, a2
+; RV32I-NEXT: snez a2, a2
+; RV32I-NEXT: neg a3, a3
+; RV32I-NEXT: sub a3, a3, a2
+; RV32I-NEXT: and a0, a4, a0
+; RV32I-NEXT: and a1, a3, a1
+; RV32I-NEXT: ret
+;
+; RV32ZBB-ZBKB-LABEL: andn_i64_from_sub:
+; RV32ZBB-ZBKB: # %bb.0:
+; RV32ZBB-ZBKB-NEXT: snez a4, a2
+; RV32ZBB-ZBKB-NEXT: neg a3, a3
+; RV32ZBB-ZBKB-NEXT: sub a3, a3, a4
+; RV32ZBB-ZBKB-NEXT: andn a0, a0, a2
+; RV32ZBB-ZBKB-NEXT: and a1, a3, a1
+; RV32ZBB-ZBKB-NEXT: ret
+ %neg = sub i64 0, %b
+ %and = and i64 %neg, %a
+ ret i64 %and
+}
+
define i32 @orn_i32(i32 %a, i32 %b) nounwind {
; RV32I-LABEL: orn_i32:
; RV32I: # %bb.0:
@@ -136,53 +176,102 @@ define i32 @rol_i32(i32 %a, i32 %b) nounwind {
declare i64 @llvm.fshl.i64(i64, i64, i64)
define i64 @rol_i64(i64 %a, i64 %b) nounwind {
-; CHECK-LABEL: rol_i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: andi a6, a2, 63
-; CHECK-NEXT: li a4, 32
-; CHECK-NEXT: bltu a6, a4, .LBB7_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a3, 0
-; CHECK-NEXT: sll a7, a0, a6
-; CHECK-NEXT: j .LBB7_3
-; CHECK-NEXT: .LBB7_2:
-; CHECK-NEXT: sll a3, a0, a2
-; CHECK-NEXT: neg a5, a6
-; CHECK-NEXT: srl a5, a0, a5
-; CHECK-NEXT: sll a7, a1, a2
-; CHECK-NEXT: or a7, a5, a7
-; CHECK-NEXT: .LBB7_3:
-; CHECK-NEXT: neg a5, a2
-; CHECK-NEXT: mv a2, a1
-; CHECK-NEXT: beqz a6, .LBB7_5
-; CHECK-NEXT: # %bb.4:
-; CHECK-NEXT: mv a2, a7
-; CHECK-NEXT: .LBB7_5:
-; CHECK-NEXT: andi a6, a5, 63
-; CHECK-NEXT: bltu a6, a4, .LBB7_7
-; CHECK-NEXT: # %bb.6:
-; CHECK-NEXT: srl a7, a1, a6
-; CHECK-NEXT: bnez a6, .LBB7_8
-; CHECK-NEXT: j .LBB7_9
-; CHECK-NEXT: .LBB7_7:
-; CHECK-NEXT: srl a7, a0, a5
-; CHECK-NEXT: neg t0, a6
-; CHECK-NEXT: sll t0, a1, t0
-; CHECK-NEXT: or a7, a7, t0
-; CHECK-NEXT: beqz a6, .LBB7_9
-; CHECK-NEXT: .LBB7_8:
-; CHECK-NEXT: mv a0, a7
-; CHECK-NEXT: .LBB7_9:
-; CHECK-NEXT: bltu a6, a4, .LBB7_11
-; CHECK-NEXT: # %bb.10:
-; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: j .LBB7_12
-; CHECK-NEXT: .LBB7_11:
-; CHECK-NEXT: srl a1, a1, a5
-; CHECK-NEXT: .LBB7_12:
-; CHECK-NEXT: or a0, a3, a0
-; CHECK-NEXT: or a1, a2, a1
-; CHECK-NEXT: ret
+; RV32I-LABEL: rol_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: andi a6, a2, 63
+; RV32I-NEXT: li a4, 32
+; RV32I-NEXT: bltu a6, a4, .LBB9_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: li a3, 0
+; RV32I-NEXT: sll a7, a0, a6
+; RV32I-NEXT: j .LBB9_3
+; RV32I-NEXT: .LBB9_2:
+; RV32I-NEXT: sll a3, a0, a2
+; RV32I-NEXT: neg a5, a6
+; RV32I-NEXT: srl a5, a0, a5
+; RV32I-NEXT: sll a7, a1, a2
+; RV32I-NEXT: or a7, a5, a7
+; RV32I-NEXT: .LBB9_3:
+; RV32I-NEXT: neg a5, a2
+; RV32I-NEXT: mv a2, a1
+; RV32I-NEXT: beqz a6, .LBB9_5
+; RV32I-NEXT: # %bb.4:
+; RV32I-NEXT: mv a2, a7
+; RV32I-NEXT: .LBB9_5:
+; RV32I-NEXT: andi a6, a5, 63
+; RV32I-NEXT: bltu a6, a4, .LBB9_7
+; RV32I-NEXT: # %bb.6:
+; RV32I-NEXT: srl a7, a1, a6
+; RV32I-NEXT: bnez a6, .LBB9_8
+; RV32I-NEXT: j .LBB9_9
+; RV32I-NEXT: .LBB9_7:
+; RV32I-NEXT: srl a7, a0, a5
+; RV32I-NEXT: neg t0, a6
+; RV32I-NEXT: sll t0, a1, t0
+; RV32I-NEXT: or a7, a7, t0
+; RV32I-NEXT: beqz a6, .LBB9_9
+; RV32I-NEXT: .LBB9_8:
+; RV32I-NEXT: mv a0, a7
+; RV32I-NEXT: .LBB9_9:
+; RV32I-NEXT: bltu a6, a4, .LBB9_11
+; RV32I-NEXT: # %bb.10:
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: j .LBB9_12
+; RV32I-NEXT: .LBB9_11:
+; RV32I-NEXT: srl a1, a1, a5
+; RV32I-NEXT: .LBB9_12:
+; RV32I-NEXT: or a0, a3, a0
+; RV32I-NEXT: or a1, a2, a1
+; RV32I-NEXT: ret
+;
+; RV32ZBB-ZBKB-LABEL: rol_i64:
+; RV32ZBB-ZBKB: # %bb.0:
+; RV32ZBB-ZBKB-NEXT: andi a6, a2, 63
+; RV32ZBB-ZBKB-NEXT: li a4, 32
+; RV32ZBB-ZBKB-NEXT: bltu a6, a4, .LBB9_2
+; RV32ZBB-ZBKB-NEXT: # %bb.1:
+; RV32ZBB-ZBKB-NEXT: li a3, 0
+; RV32ZBB-ZBKB-NEXT: sll a7, a0, a6
+; RV32ZBB-ZBKB-NEXT: j .LBB9_3
+; RV32ZBB-ZBKB-NEXT: .LBB9_2:
+; RV32ZBB-ZBKB-NEXT: sll a3, a0, a2
+; RV32ZBB-ZBKB-NEXT: neg a5, a6
+; RV32ZBB-ZBKB-NEXT: srl a5, a0, a5
+; RV32ZBB-ZBKB-NEXT: sll a7, a1, a2
+; RV32ZBB-ZBKB-NEXT: or a7, a5, a7
+; RV32ZBB-ZBKB-NEXT: .LBB9_3:
+; RV32ZBB-ZBKB-NEXT: li t0, 63
+; RV32ZBB-ZBKB-NEXT: mv a5, a1
+; RV32ZBB-ZBKB-NEXT: beqz a6, .LBB9_5
+; RV32ZBB-ZBKB-NEXT: # %bb.4:
+; RV32ZBB-ZBKB-NEXT: mv a5, a7
+; RV32ZBB-ZBKB-NEXT: .LBB9_5:
+; RV32ZBB-ZBKB-NEXT: andn a6, t0, a2
+; RV32ZBB-ZBKB-NEXT: neg a2, a2
+; RV32ZBB-ZBKB-NEXT: bltu a6, a4, .LBB9_7
+; RV32ZBB-ZBKB-NEXT: # %bb.6:
+; RV32ZBB-ZBKB-NEXT: srl a7, a1, a6
+; RV32ZBB-ZBKB-NEXT: bnez a6, .LBB9_8
+; RV32ZBB-ZBKB-NEXT: j .LBB9_9
+; RV32ZBB-ZBKB-NEXT: .LBB9_7:
+; RV32ZBB-ZBKB-NEXT: srl a7, a0, a2
+; RV32ZBB-ZBKB-NEXT: neg t0, a6
+; RV32ZBB-ZBKB-NEXT: sll t0, a1, t0
+; RV32ZBB-ZBKB-NEXT: or a7, a7, t0
+; RV32ZBB-ZBKB-NEXT: beqz a6, .LBB9_9
+; RV32ZBB-ZBKB-NEXT: .LBB9_8:
+; RV32ZBB-ZBKB-NEXT: mv a0, a7
+; RV32ZBB-ZBKB-NEXT: .LBB9_9:
+; RV32ZBB-ZBKB-NEXT: bltu a6, a4, .LBB9_11
+; RV32ZBB-ZBKB-NEXT: # %bb.10:
+; RV32ZBB-ZBKB-NEXT: li a1, 0
+; RV32ZBB-ZBKB-NEXT: j .LBB9_12
+; RV32ZBB-ZBKB-NEXT: .LBB9_11:
+; RV32ZBB-ZBKB-NEXT: srl a1, a1, a2
+; RV32ZBB-ZBKB-NEXT: .LBB9_12:
+; RV32ZBB-ZBKB-NEXT: or a0, a3, a0
+; RV32ZBB-ZBKB-NEXT: or a1, a5, a1
+; RV32ZBB-ZBKB-NEXT: ret
%or = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %b)
ret i64 %or
}
@@ -212,54 +301,104 @@ define i32 @ror_i32(i32 %a, i32 %b) nounwind {
declare i64 @llvm.fshr.i64(i64, i64, i64)
define i64 @ror_i64(i64 %a, i64 %b) nounwind {
-; CHECK-LABEL: ror_i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: andi a5, a2, 63
-; CHECK-NEXT: li a4, 32
-; CHECK-NEXT: bltu a5, a4, .LBB9_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: srl a6, a1, a5
-; CHECK-NEXT: mv a3, a0
-; CHECK-NEXT: bnez a5, .LBB9_3
-; CHECK-NEXT: j .LBB9_4
-; CHECK-NEXT: .LBB9_2:
-; CHECK-NEXT: srl a3, a0, a2
-; CHECK-NEXT: neg a6, a5
-; CHECK-NEXT: sll a6, a1, a6
-; CHECK-NEXT: or a6, a3, a6
-; CHECK-NEXT: mv a3, a0
-; CHECK-NEXT: beqz a5, .LBB9_4
-; CHECK-NEXT: .LBB9_3:
-; CHECK-NEXT: mv a3, a6
-; CHECK-NEXT: .LBB9_4:
-; CHECK-NEXT: neg a6, a2
-; CHECK-NEXT: bltu a5, a4, .LBB9_7
-; CHECK-NEXT: # %bb.5:
-; CHECK-NEXT: li a2, 0
-; CHECK-NEXT: andi a5, a6, 63
-; CHECK-NEXT: bgeu a5, a4, .LBB9_8
-; CHECK-NEXT: .LBB9_6:
-; CHECK-NEXT: sll a4, a0, a6
-; CHECK-NEXT: neg a7, a5
-; CHECK-NEXT: srl a0, a0, a7
-; CHECK-NEXT: sll a6, a1, a6
-; CHECK-NEXT: or a0, a0, a6
-; CHECK-NEXT: bnez a5, .LBB9_9
-; CHECK-NEXT: j .LBB9_10
-; CHECK-NEXT: .LBB9_7:
-; CHECK-NEXT: srl a2, a1, a2
-; CHECK-NEXT: andi a5, a6, 63
-; CHECK-NEXT: bltu a5, a4, .LBB9_6
-; CHECK-NEXT: .LBB9_8:
-; CHECK-NEXT: li a4, 0
-; CHECK-NEXT: sll a0, a0, a5
-; CHECK-NEXT: beqz a5, .LBB9_10
-; CHECK-NEXT: .LBB9_9:
-; CHECK-NEXT: mv a1, a0
-; CHECK-NEXT: .LBB9_10:
-; CHECK-NEXT: or a0, a3, a4
-; CHECK-NEXT: or a1, a2, a1
-; CHECK-NEXT: ret
+; RV32I-LABEL: ror_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: andi a5, a2, 63
+; RV32I-NEXT: li a4, 32
+; RV32I-NEXT: bltu a5, a4, .LBB11_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: srl a6, a1, a5
+; RV32I-NEXT: mv a3, a0
+; RV32I-NEXT: bnez a5, .LBB11_3
+; RV32I-NEXT: j .LBB11_4
+; RV32I-NEXT: .LBB11_2:
+; RV32I-NEXT: srl a3, a0, a2
+; RV32I-NEXT: neg a6, a5
+; RV32I-NEXT: sll a6, a1, a6
+; RV32I-NEXT: or a6, a3, a6
+; RV32I-NEXT: mv a3, a0
+; RV32I-NEXT: beqz a5, .LBB11_4
+; RV32I-NEXT: .LBB11_3:
+; RV32I-NEXT: mv a3, a6
+; RV32I-NEXT: .LBB11_4:
+; RV32I-NEXT: neg a6, a2
+; RV32I-NEXT: bltu a5, a4, .LBB11_7
+; RV32I-NEXT: # %bb.5:
+; RV32I-NEXT: li a2, 0
+; RV32I-NEXT: andi a5, a6, 63
+; RV32I-NEXT: bgeu a5, a4, .LBB11_8
+; RV32I-NEXT: .LBB11_6:
+; RV32I-NEXT: sll a4, a0, a6
+; RV32I-NEXT: neg a7, a5
+; RV32I-NEXT: srl a0, a0, a7
+; RV32I-NEXT: sll a6, a1, a6
+; RV32I-NEXT: or a0, a0, a6
+; RV32I-NEXT: bnez a5, .LBB11_9
+; RV32I-NEXT: j .LBB11_10
+; RV32I-NEXT: .LBB11_7:
+; RV32I-NEXT: srl a2, a1, a2
+; RV32I-NEXT: andi a5, a6, 63
+; RV32I-NEXT: bltu a5, a4, .LBB11_6
+; RV32I-NEXT: .LBB11_8:
+; RV32I-NEXT: li a4, 0
+; RV32I-NEXT: sll a0, a0, a5
+; RV32I-NEXT: beqz a5, .LBB11_10
+; RV32I-NEXT: .LBB11_9:
+; RV32I-NEXT: mv a1, a0
+; RV32I-NEXT: .LBB11_10:
+; RV32I-NEXT: or a0, a3, a4
+; RV32I-NEXT: or a1, a2, a1
+; RV32I-NEXT: ret
+;
+; RV32ZBB-ZBKB-LABEL: ror_i64:
+; RV32ZBB-ZBKB: # %bb.0:
+; RV32ZBB-ZBKB-NEXT: andi a4, a2, 63
+; RV32ZBB-ZBKB-NEXT: li a5, 32
+; RV32ZBB-ZBKB-NEXT: bltu a4, a5, .LBB11_2
+; RV32ZBB-ZBKB-NEXT: # %bb.1:
+; RV32ZBB-ZBKB-NEXT: srl a6, a1, a4
+; RV32ZBB-ZBKB-NEXT: mv a3, a0
+; RV32ZBB-ZBKB-NEXT: bnez a4, .LBB11_3
+; RV32ZBB-ZBKB-NEXT: j .LBB11_4
+; RV32ZBB-ZBKB-NEXT: .LBB11_2:
+; RV32ZBB-ZBKB-NEXT: srl a3, a0, a2
+; RV32ZBB-ZBKB-NEXT: neg a6, a4
+; RV32ZBB-ZBKB-NEXT: sll a6, a1, a6
+; RV32ZBB-ZBKB-NEXT: or a6, a3, a6
+; RV32ZBB-ZBKB-NEXT: mv a3, a0
+; RV32ZBB-ZBKB-NEXT: beqz a4, .LBB11_4
+; RV32ZBB-ZBKB-NEXT: .LBB11_3:
+; RV32ZBB-ZBKB-NEXT: mv a3, a6
+; RV32ZBB-ZBKB-NEXT: .LBB11_4:
+; RV32ZBB-ZBKB-NEXT: li a6, 63
+; RV32ZBB-ZBKB-NEXT: bltu a4, a5, .LBB11_7
+; RV32ZBB-ZBKB-NEXT: # %bb.5:
+; RV32ZBB-ZBKB-NEXT: li a4, 0
+; RV32ZBB-ZBKB-NEXT: andn a6, a6, a2
+; RV32ZBB-ZBKB-NEXT: bgeu a6, a5, .LBB11_8
+; RV32ZBB-ZBKB-NEXT: .LBB11_6:
+; RV32ZBB-ZBKB-NEXT: neg a5, a2
+; RV32ZBB-ZBKB-NEXT: neg a7, a6
+; RV32ZBB-ZBKB-NEXT: sll a2, a0, a5
+; RV32ZBB-ZBKB-NEXT: srl a0, a0, a7
+; RV32ZBB-ZBKB-NEXT: sll a5, a1, a5
+; RV32ZBB-ZBKB-NEXT: or a0, a0, a5
+; RV32ZBB-ZBKB-NEXT: bnez a6, .LBB11_9
+; RV32ZBB-ZBKB-NEXT: j .LBB11_10
+; RV32ZBB-ZBKB-NEXT: .LBB11_7:
+; RV32ZBB-ZBKB-NEXT: srl a4, a1, a2
+; RV32ZBB-ZBKB-NEXT: andn a6, a6, a2
+; RV32ZBB-ZBKB-NEXT: bltu a6, a5, .LBB11_6
+; RV32ZBB-ZBKB-NEXT: .LBB11_8:
+; RV32ZBB-ZBKB-NEXT: li a2, 0
+; RV32ZBB-ZBKB-NEXT: sll a0, a0, a6
+; RV32ZBB-ZBKB-NEXT: beqz a6, .LBB11_10
+; RV32ZBB-ZBKB-NEXT: .LBB11_9:
+; RV32ZBB-ZBKB-NEXT: mv a1, a0
+; RV32ZBB-ZBKB-NEXT: .LBB11_10:
+; RV32ZBB-ZBKB-NEXT: or a0, a3, a2
+; RV32ZBB-ZBKB-NEXT: or a1, a4, a1
+; RV32ZBB-ZBKB-NEXT: ret
%or = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %b)
ret i64 %or
}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb-zbkb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb-zbkb.ll
index 79d08772e8853..842ef1b215cd1 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb-zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb-zbkb.ll
@@ -6,6 +6,17 @@
; RUN: llc -mtriple=riscv64 -global-isel -mattr=+zbkb -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefixes=CHECK,RV64ZBB-ZBKB,RV64ZBKB
+define signext i32 @andn_i32_from_sub(i32 signext %a, i32 signext %b) nounwind {
+; CHECK-LABEL: andn_i32_from_sub:
+; CHECK: # %bb.0:
+; CHECK-NEXT: negw a1, a1
+; CHECK-NEXT: and a0, a1, a0
+; CHECK-NEXT: ret
+ %neg = sub i32 0, %b
+ %and = and i32 %neg, %a
+ ret i32 %and
+}
+
define signext i32 @andn_i32(i32 signext %a, i32 signext %b) nounwind {
; RV64I-LABEL: andn_i32:
; RV64I: # %bb.0:
@@ -38,6 +49,22 @@ define i64 @andn_i64(i64 %a, i64 %b) nounwind {
ret i64 %and
}
+define i64 @andn_i64_from_sub(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: andn_i64_from_sub:
+; RV64I: # %bb.0:
+; RV64I-NEXT: neg a1, a1
+; RV64I-NEXT: and a0, a1, a0
+; RV64I-NEXT: ret
+;
+; RV64ZBB-ZBKB-LABEL: andn_i64_from_sub:
+; RV64ZBB-ZBKB: # %bb.0:
+; RV64ZBB-ZBKB-NEXT: andn a0, a0, a1
+; RV64ZBB-ZBKB-NEXT: ret
+ %neg = sub i64 0, %b
+ %and = and i64 %neg, %a
+ ret i64 %and
+}
+
define signext i32 @orn_i32(i32 signext %a, i32 signext %b) nounwind {
; RV64I-LABEL: orn_i32:
; RV64I: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll b/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll
index b6344f88cddaa..045d0939d0764 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll
@@ -22,6 +22,22 @@ define i32 @andn_i32(i32 %a, i32 %b) nounwind {
ret i32 %and
}
+define i32 @andn_i32_from_sub(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: andn_i32_from_sub:
+; RV32I: # %bb.0:
+; RV32I-NEXT: neg a1, a1
+; RV32I-NEXT: and a0, a1, a0
+; RV32I-NEXT: ret
+;
+; RV32ZBB-ZBKB-LABEL: andn_i32_from_sub:
+; RV32ZBB-ZBKB: # %bb.0:
+; RV32ZBB-ZBKB-NEXT: andn a0, a0, a1
+; RV32ZBB-ZBKB-NEXT: ret
+ %neg = sub i32 0, %b
+ %and = and i32 %neg, %a
+ ret i32 %and
+}
+
define i64 @andn_i64(i64 %a, i64 %b) nounwind {
; RV32I-LABEL: andn_i64:
; RV32I: # %bb.0:
@@ -41,6 +57,29 @@ define i64 @andn_i64(i64 %a, i64 %b) nounwind {
ret i64 %and
}
+define i64 @andn_i64_from_sub(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: andn_i64_from_sub:
+; RV32I: # %bb.0:
+; RV32I-NEXT: snez a4, a2
+; RV32I-NEXT: neg a3, a3
+; RV32I-NEXT: neg a2, a2
+; RV32I-NEXT: sub a3, a3, a4
+; RV32I-NEXT: and a0, a2, a0
+; RV32I-NEXT: and a1, a3, a1
+; RV32I-NEXT: ret
+;
+; RV32ZBB-ZBKB-LABEL: andn_i64_from_sub:
+; RV32ZBB-ZBKB: # %bb.0:
+; RV32ZBB-ZBKB-NEXT: snez a4, a2
+; RV32ZBB-ZBKB-NEXT: add a3, a3, a4
+; RV32ZBB-ZBKB-NEXT: andn a0, a0, a2
+; RV32ZBB-ZBKB-NEXT: andn a1, a1, a3
+; RV32ZBB-ZBKB-NEXT: ret
+ %neg = sub i64 0, %b
+ %and = and i64 %neg, %a
+ ret i64 %and
+}
+
define i32 @orn_i32(i32 %a, i32 %b) nounwind {
; RV32I-LABEL: orn_i32:
; RV32I: # %bb.0:
@@ -141,15 +180,15 @@ define i64 @rol_i64(i64 %a, i64 %b) nounwind {
; CHECK-NEXT: slli a5, a2, 26
; CHECK-NEXT: srli a5, a5, 31
; CHECK-NEXT: mv a3, a1
-; CHECK-NEXT: bnez a5, .LBB7_2
+; CHECK-NEXT: bnez a5, .LBB9_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a3, a0
-; CHECK-NEXT: .LBB7_2:
+; CHECK-NEXT: .LBB9_2:
; CHECK-NEXT: sll a4, a3, a2
-; CHECK-NEXT: bnez a5, .LBB7_4
+; CHECK-NEXT: bnez a5, .LBB9_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB7_4:
+; CHECK-NEXT: .LBB9_4:
; CHECK-NEXT: srli a1, a0, 1
; CHECK-NEXT: not a5, a2
; CHECK-NEXT: sll a2, a0, a2
@@ -192,15 +231,15 @@ define i64 @ror_i64(i64 %a, i64 %b) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: andi a5, a2, 32
; CHECK-NEXT: mv a3, a0
-; CHECK-NEXT: beqz a5, .LBB9_2
+; CHECK-NEXT: beqz a5, .LBB11_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a3, a1
-; CHECK-NEXT: .LBB9_2:
+; CHECK-NEXT: .LBB11_2:
; CHECK-NEXT: srl a4, a3, a2
-; CHECK-NEXT: beqz a5, .LBB9_4
+; CHECK-NEXT: beqz a5, .LBB11_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: mv a1, a0
-; CHECK-NEXT: .LBB9_4:
+; CHECK-NEXT: .LBB11_4:
; CHECK-NEXT: slli a0, a1, 1
; CHECK-NEXT: not a5, a2
; CHECK-NEXT: srl a1, a1, a2
@@ -293,19 +332,32 @@ define i32 @not_shl_one_i32(i32 %x) {
}
define i64 @not_shl_one_i64(i64 %x) {
-; CHECK-LABEL: not_shl_one_i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a1, a0, -32
-; CHECK-NEXT: li a2, 1
-; CHECK-NEXT: slti a1, a1, 0
-; CHECK-NEXT: sll a0, a2, a0
-; CHECK-NEXT: neg a2, a1
-; CHECK-NEXT: addi a1, a1, -1
-; CHECK-NEXT: and a2, a2, a0
-; CHECK-NEXT: and a1, a1, a0
-; CHECK-NEXT: not a0, a2
-; CHECK-NEXT: not a1, a1
-; CHECK-NEXT: ret
+; RV32I-LABEL: not_shl_one_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi a1, a0, -32
+; RV32I-NEXT: li a2, 1
+; RV32I-NEXT: slti a1, a1, 0
+; RV32I-NEXT: sll a0, a2, a0
+; RV32I-NEXT: neg a2, a1
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a2, a2, a0
+; RV32I-NEXT: and a1, a1, a0
+; RV32I-NEXT: not a0, a2
+; RV32I-NEXT: not a1, a1
+; RV32I-NEXT: ret
+;
+; RV32ZBB-ZBKB-LABEL: not_shl_one_i64:
+; RV32ZBB-ZBKB: # %bb.0:
+; RV32ZBB-ZBKB-NEXT: li a1, 1
+; RV32ZBB-ZBKB-NEXT: addi a2, a0, -32
+; RV32ZBB-ZBKB-NEXT: sll a0, a1, a0
+; RV32ZBB-ZBKB-NEXT: slti a1, a2, 0
+; RV32ZBB-ZBKB-NEXT: addi a2, a1, -1
+; RV32ZBB-ZBKB-NEXT: andn a1, a0, a1
+; RV32ZBB-ZBKB-NEXT: and a2, a2, a0
+; RV32ZBB-ZBKB-NEXT: not a0, a1
+; RV32ZBB-ZBKB-NEXT: not a1, a2
+; RV32ZBB-ZBKB-NEXT: ret
%1 = shl i64 1, %x
%2 = xor i64 %1, -1
ret i64 %2
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll b/llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll
index bf077364c9c7a..b4ed47d9c4fbc 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll
@@ -22,6 +22,17 @@ define signext i32 @andn_i32(i32 signext %a, i32 signext %b) nounwind {
ret i32 %and
}
+define signext i32 @andn_i32_from_sub(i32 signext %a, i32 signext %b) nounwind {
+; CHECK-LABEL: andn_i32_from_sub:
+; CHECK: # %bb.0:
+; CHECK-NEXT: negw a1, a1
+; CHECK-NEXT: and a0, a1, a0
+; CHECK-NEXT: ret
+ %neg = sub i32 0, %b
+ %and = and i32 %neg, %a
+ ret i32 %and
+}
+
define i64 @andn_i64(i64 %a, i64 %b) nounwind {
; RV64I-LABEL: andn_i64:
; RV64I: # %bb.0:
@@ -38,6 +49,22 @@ define i64 @andn_i64(i64 %a, i64 %b) nounwind {
ret i64 %and
}
+define i64 @andn_i64_from_sub(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: andn_i64_from_sub:
+; RV64I: # %bb.0:
+; RV64I-NEXT: neg a1, a1
+; RV64I-NEXT: and a0, a1, a0
+; RV64I-NEXT: ret
+;
+; RV64ZBB-ZBKB-LABEL: andn_i64_from_sub:
+; RV64ZBB-ZBKB: # %bb.0:
+; RV64ZBB-ZBKB-NEXT: andn a0, a0, a1
+; RV64ZBB-ZBKB-NEXT: ret
+ %neg = sub i64 0, %b
+ %and = and i64 %neg, %a
+ ret i64 %and
+}
+
define signext i32 @orn_i32(i32 signext %a, i32 signext %b) nounwind {
; RV64I-LABEL: orn_i32:
; RV64I: # %bb.0:
|
|
Maybe it would make sense, to add this pattern here too: |
|
In this first attempt I have played it safe. I would be open to more generic solutions, if you think that would be nice. Maybe use the target hook |
This seems completely wrong since they calculate different results. Perhaps you meant "and x (sub -1, y) -> andn x y"?? |
|
@jayfoad Yes you are right. Sorry for the mistake and thank you for noticing it. |
This is the case for signed integers. I see that. What if we have unsigned integers? |
Fixed bitwidth 2's complement add and subtract work exactly the same for signed and unsigned integers - that's why we don't have separate signed/unsigned opcodes for them (like we do for udiv and sdiv for example). |
|
Isn't sub -1, x canonicalized to xor x, -1? |
Currently, I think they are not. |
Furthermore, if the InstCombiner sees the |
But it should be: https://github.com/llvm/llvm-project/blob/3f63e1c834e000d4ea95d667ae224cc232927196/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L3969 |
|
The added tests already produce |
I get it now. Sorry for this. After this I still don't get why the scenario shown in the issue doesn't work. Do you have any idea for that? |
I think you have to match all 4 instructions. The full pattern is |
|
And maybe, it will be enough to work on the IR level, if the suggested IR transformations are done, we will get Sorry for my negligence and Thank you for you time and patience. |
I don't think this can be optimized in IR. InstCombine will nearly always try to reduce critical depth if doesn't increase the number of instructions. This change is only profitable if Both InstCombine and DAGCombiner will want to turn |
Should fix the description. It still uses 0 in the pattern instead of -1. |
Try to address issue #125354 .