-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[RISCV] Use shiftMaskXLen for one of the BCLR patterns. #164206
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This allows us to remove AND from the shift amount when DAG combine has replaced (not (shl 1, X)) with (rotl -2, X). SimplifyDemandedBits will often simplify the rotl case on its own, but not if the masked shift amount has multiple users. This will need to be rebase if llvm#164050 goes in first.
@llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesThis allows us to remove AND from the shift amount when DAG combine This will need to be rebased if #164050 goes in first. Full diff: https://github.com/llvm/llvm-project/pull/164206.diff 3 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index 57fbaa04ec687..f31ba17d6112d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -527,7 +527,7 @@ def : Pat<(riscv_rolw GPR:$rs1, uimm5:$rs2),
let Predicates = [HasStdExtZbs] in {
def : Pat<(XLenVT (and (not (shiftop<shl> 1, (XLenVT GPR:$rs2))), GPR:$rs1)),
(BCLR GPR:$rs1, GPR:$rs2)>;
-def : Pat<(XLenVT (and (rotl -2, (XLenVT GPR:$rs2)), GPR:$rs1)),
+def : Pat<(XLenVT (and (shiftop<rotl> -2, (XLenVT GPR:$rs2)), GPR:$rs1)),
(BCLR GPR:$rs1, GPR:$rs2)>;
def : Pat<(XLenVT (or (shiftop<shl> 1, (XLenVT GPR:$rs2)), GPR:$rs1)),
(BSET GPR:$rs1, GPR:$rs2)>;
diff --git a/llvm/test/CodeGen/RISCV/rv32zbs.ll b/llvm/test/CodeGen/RISCV/rv32zbs.ll
index dcb70f88fd4ac..f9527ef79272b 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbs.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbs.ll
@@ -45,6 +45,32 @@ define i32 @bclr_i32_no_mask(i32 %a, i32 %b) nounwind {
ret i32 %and1
}
+define i32 @bclr_i32_mask_multiple(i32 %a, i32 %b, i32 %shamt) nounwind {
+; RV32I-LABEL: bclr_i32_mask_multiple:
+; RV32I: # %bb.0:
+; RV32I-NEXT: li a3, 1
+; RV32I-NEXT: sll a2, a3, a2
+; RV32I-NEXT: not a3, a2
+; RV32I-NEXT: and a0, a3, a0
+; RV32I-NEXT: or a1, a1, a2
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: ret
+;
+; RV32ZBS-LABEL: bclr_i32_mask_multiple:
+; RV32ZBS: # %bb.0:
+; RV32ZBS-NEXT: bclr a0, a0, a2
+; RV32ZBS-NEXT: bset a1, a1, a2
+; RV32ZBS-NEXT: add a0, a0, a1
+; RV32ZBS-NEXT: ret
+ %shamt_masked = and i32 %shamt, 63
+ %shl = shl nuw i32 1, %shamt_masked
+ %neg = xor i32 %shl, -1
+ %and = and i32 %neg, %a
+ %or = or i32 %b, %shl
+ %c = add i32 %and, %or
+ ret i32 %c
+}
+
define i64 @bclr_i64(i64 %a, i64 %b) nounwind {
; RV32I-LABEL: bclr_i64:
; RV32I: # %bb.0:
@@ -301,17 +327,17 @@ define i64 @bext_i64(i64 %a, i64 %b) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: andi a3, a2, 63
; CHECK-NEXT: addi a4, a3, -32
-; CHECK-NEXT: bltz a4, .LBB12_2
+; CHECK-NEXT: bltz a4, .LBB13_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: srl a0, a1, a3
-; CHECK-NEXT: j .LBB12_3
-; CHECK-NEXT: .LBB12_2:
+; CHECK-NEXT: j .LBB13_3
+; CHECK-NEXT: .LBB13_2:
; CHECK-NEXT: srl a0, a0, a2
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: not a2, a3
; CHECK-NEXT: sll a1, a1, a2
; CHECK-NEXT: or a0, a0, a1
-; CHECK-NEXT: .LBB12_3:
+; CHECK-NEXT: .LBB13_3:
; CHECK-NEXT: andi a0, a0, 1
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: ret
@@ -789,17 +815,17 @@ define i64 @bset_trailing_ones_i64_mask(i64 %a) nounwind {
; CHECK-NEXT: li a3, -1
; CHECK-NEXT: addi a1, a2, -32
; CHECK-NEXT: sll a0, a3, a0
-; CHECK-NEXT: bltz a1, .LBB43_2
+; CHECK-NEXT: bltz a1, .LBB44_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: sll a2, a3, a2
-; CHECK-NEXT: j .LBB43_3
-; CHECK-NEXT: .LBB43_2:
+; CHECK-NEXT: j .LBB44_3
+; CHECK-NEXT: .LBB44_2:
; CHECK-NEXT: not a2, a2
; CHECK-NEXT: lui a3, 524288
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: srl a2, a3, a2
; CHECK-NEXT: or a2, a0, a2
-; CHECK-NEXT: .LBB43_3:
+; CHECK-NEXT: .LBB44_3:
; CHECK-NEXT: srai a1, a1, 31
; CHECK-NEXT: and a0, a1, a0
; CHECK-NEXT: not a1, a2
@@ -817,17 +843,17 @@ define i64 @bset_trailing_ones_i64_no_mask(i64 %a) nounwind {
; CHECK-NEXT: li a1, -1
; CHECK-NEXT: addi a2, a0, -32
; CHECK-NEXT: sll a1, a1, a0
-; CHECK-NEXT: bltz a2, .LBB44_2
+; CHECK-NEXT: bltz a2, .LBB45_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: j .LBB44_3
-; CHECK-NEXT: .LBB44_2:
+; CHECK-NEXT: j .LBB45_3
+; CHECK-NEXT: .LBB45_2:
; CHECK-NEXT: not a0, a0
; CHECK-NEXT: lui a3, 524288
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: srl a0, a3, a0
; CHECK-NEXT: or a0, a1, a0
-; CHECK-NEXT: .LBB44_3:
+; CHECK-NEXT: .LBB45_3:
; CHECK-NEXT: srai a2, a2, 31
; CHECK-NEXT: and a2, a2, a1
; CHECK-NEXT: not a1, a0
diff --git a/llvm/test/CodeGen/RISCV/rv64zbs.ll b/llvm/test/CodeGen/RISCV/rv64zbs.ll
index b4edcf6cc55cf..d42bc8e128082 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbs.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbs.ll
@@ -110,6 +110,32 @@ define i64 @bclr_i64_no_mask(i64 %a, i64 %b) nounwind {
ret i64 %and1
}
+define i64 @bclr_i64_mask_multiple(i64 %a, i64 %b, i64 %shamt) nounwind {
+; RV64I-LABEL: bclr_i64_mask_multiple:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a3, 1
+; RV64I-NEXT: sll a2, a3, a2
+; RV64I-NEXT: not a3, a2
+; RV64I-NEXT: and a0, a3, a0
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBS-LABEL: bclr_i64_mask_multiple:
+; RV64ZBS: # %bb.0:
+; RV64ZBS-NEXT: bclr a0, a0, a2
+; RV64ZBS-NEXT: bset a1, a1, a2
+; RV64ZBS-NEXT: add a0, a0, a1
+; RV64ZBS-NEXT: ret
+ %shamt_masked = and i64 %shamt, 63
+ %shl = shl nuw i64 1, %shamt_masked
+ %neg = xor i64 %shl, -1
+ %and = and i64 %neg, %a
+ %or = or i64 %b, %shl
+ %c = add i64 %and, %or
+ ret i64 %c
+}
+
define signext i32 @bset_i32(i32 signext %a, i32 signext %b) nounwind {
; RV64I-LABEL: bset_i32:
; RV64I: # %bb.0:
@@ -372,19 +398,19 @@ define void @bext_i32_trunc(i32 signext %0, i32 signext %1) {
; RV64I: # %bb.0:
; RV64I-NEXT: srlw a0, a0, a1
; RV64I-NEXT: andi a0, a0, 1
-; RV64I-NEXT: beqz a0, .LBB19_2
+; RV64I-NEXT: beqz a0, .LBB20_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: ret
-; RV64I-NEXT: .LBB19_2:
+; RV64I-NEXT: .LBB20_2:
; RV64I-NEXT: tail bar
;
; RV64ZBS-LABEL: bext_i32_trunc:
; RV64ZBS: # %bb.0:
; RV64ZBS-NEXT: bext a0, a0, a1
-; RV64ZBS-NEXT: beqz a0, .LBB19_2
+; RV64ZBS-NEXT: beqz a0, .LBB20_2
; RV64ZBS-NEXT: # %bb.1:
; RV64ZBS-NEXT: ret
-; RV64ZBS-NEXT: .LBB19_2:
+; RV64ZBS-NEXT: .LBB20_2:
; RV64ZBS-NEXT: tail bar
%3 = shl i32 1, %1
%4 = and i32 %3, %0
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
This allows us to remove AND from the shift amount when DAG combine
has replaced (not (shl 1, X)) with (rotl -2, X). SimplifyDemandedBits
will often simplify the rotl case on its own, but not if the masked
shift amount has multiple users.