Skip to content

Commit 3210b13

Browse files
toppercaokblast
authored andcommitted
[RISCV] Use shiftMaskXLen for one of the BCLR patterns. (llvm#164206)
This allows us to remove AND from the shift amount when DAG combine has replaced (not (shl 1, X)) with (rotl -2, X). SimplifyDemandedBits will often simplify the rotl case on its own, but not if the masked shift amount has multiple users.
1 parent 98be0b4 commit 3210b13

File tree

3 files changed

+70
-18
lines changed

3 files changed

+70
-18
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfoZb.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -527,8 +527,8 @@ def : Pat<(riscv_rolw GPR:$rs1, uimm5:$rs2),
527527
let Predicates = [HasStdExtZbs] in {
528528
def : Pat<(XLenVT (and (not (shl 1, shiftMaskXLen:$rs2)), GPR:$rs1)),
529529
(BCLR GPR:$rs1, shiftMaskXLen:$rs2)>;
530-
def : Pat<(XLenVT (and (rotl -2, (XLenVT GPR:$rs2)), GPR:$rs1)),
531-
(BCLR GPR:$rs1, GPR:$rs2)>;
530+
def : Pat<(XLenVT (and (rotl -2, shiftMaskXLen:$rs2), GPR:$rs1)),
531+
(BCLR GPR:$rs1, shiftMaskXLen:$rs2)>;
532532
def : Pat<(XLenVT (or (shl 1, shiftMaskXLen:$rs2), GPR:$rs1)),
533533
(BSET GPR:$rs1, shiftMaskXLen:$rs2)>;
534534
def : Pat<(XLenVT (xor (shl 1, shiftMaskXLen:$rs2), GPR:$rs1)),

llvm/test/CodeGen/RISCV/rv32zbs.ll

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,32 @@ define i32 @bclr_i32_no_mask(i32 %a, i32 %b) nounwind {
4545
ret i32 %and1
4646
}
4747

48+
define i32 @bclr_i32_mask_multiple(i32 %a, i32 %b, i32 %shamt) nounwind {
49+
; RV32I-LABEL: bclr_i32_mask_multiple:
50+
; RV32I: # %bb.0:
51+
; RV32I-NEXT: li a3, 1
52+
; RV32I-NEXT: sll a2, a3, a2
53+
; RV32I-NEXT: not a3, a2
54+
; RV32I-NEXT: and a0, a3, a0
55+
; RV32I-NEXT: or a1, a1, a2
56+
; RV32I-NEXT: add a0, a0, a1
57+
; RV32I-NEXT: ret
58+
;
59+
; RV32ZBS-LABEL: bclr_i32_mask_multiple:
60+
; RV32ZBS: # %bb.0:
61+
; RV32ZBS-NEXT: bclr a0, a0, a2
62+
; RV32ZBS-NEXT: bset a1, a1, a2
63+
; RV32ZBS-NEXT: add a0, a0, a1
64+
; RV32ZBS-NEXT: ret
65+
%shamt_masked = and i32 %shamt, 63
66+
%shl = shl nuw i32 1, %shamt_masked
67+
%neg = xor i32 %shl, -1
68+
%and = and i32 %neg, %a
69+
%or = or i32 %b, %shl
70+
%c = add i32 %and, %or
71+
ret i32 %c
72+
}
73+
4874
define i64 @bclr_i64(i64 %a, i64 %b) nounwind {
4975
; RV32I-LABEL: bclr_i64:
5076
; RV32I: # %bb.0:
@@ -301,17 +327,17 @@ define i64 @bext_i64(i64 %a, i64 %b) nounwind {
301327
; CHECK: # %bb.0:
302328
; CHECK-NEXT: andi a3, a2, 63
303329
; CHECK-NEXT: addi a4, a3, -32
304-
; CHECK-NEXT: bltz a4, .LBB12_2
330+
; CHECK-NEXT: bltz a4, .LBB13_2
305331
; CHECK-NEXT: # %bb.1:
306332
; CHECK-NEXT: srl a0, a1, a3
307-
; CHECK-NEXT: j .LBB12_3
308-
; CHECK-NEXT: .LBB12_2:
333+
; CHECK-NEXT: j .LBB13_3
334+
; CHECK-NEXT: .LBB13_2:
309335
; CHECK-NEXT: srl a0, a0, a2
310336
; CHECK-NEXT: slli a1, a1, 1
311337
; CHECK-NEXT: not a2, a3
312338
; CHECK-NEXT: sll a1, a1, a2
313339
; CHECK-NEXT: or a0, a0, a1
314-
; CHECK-NEXT: .LBB12_3:
340+
; CHECK-NEXT: .LBB13_3:
315341
; CHECK-NEXT: andi a0, a0, 1
316342
; CHECK-NEXT: li a1, 0
317343
; CHECK-NEXT: ret
@@ -789,17 +815,17 @@ define i64 @bset_trailing_ones_i64_mask(i64 %a) nounwind {
789815
; CHECK-NEXT: li a3, -1
790816
; CHECK-NEXT: addi a1, a2, -32
791817
; CHECK-NEXT: sll a0, a3, a0
792-
; CHECK-NEXT: bltz a1, .LBB43_2
818+
; CHECK-NEXT: bltz a1, .LBB44_2
793819
; CHECK-NEXT: # %bb.1:
794820
; CHECK-NEXT: sll a2, a3, a2
795-
; CHECK-NEXT: j .LBB43_3
796-
; CHECK-NEXT: .LBB43_2:
821+
; CHECK-NEXT: j .LBB44_3
822+
; CHECK-NEXT: .LBB44_2:
797823
; CHECK-NEXT: not a2, a2
798824
; CHECK-NEXT: lui a3, 524288
799825
; CHECK-NEXT: addi a3, a3, -1
800826
; CHECK-NEXT: srl a2, a3, a2
801827
; CHECK-NEXT: or a2, a0, a2
802-
; CHECK-NEXT: .LBB43_3:
828+
; CHECK-NEXT: .LBB44_3:
803829
; CHECK-NEXT: srai a1, a1, 31
804830
; CHECK-NEXT: and a0, a1, a0
805831
; CHECK-NEXT: not a1, a2
@@ -817,17 +843,17 @@ define i64 @bset_trailing_ones_i64_no_mask(i64 %a) nounwind {
817843
; CHECK-NEXT: li a1, -1
818844
; CHECK-NEXT: addi a2, a0, -32
819845
; CHECK-NEXT: sll a1, a1, a0
820-
; CHECK-NEXT: bltz a2, .LBB44_2
846+
; CHECK-NEXT: bltz a2, .LBB45_2
821847
; CHECK-NEXT: # %bb.1:
822848
; CHECK-NEXT: mv a0, a1
823-
; CHECK-NEXT: j .LBB44_3
824-
; CHECK-NEXT: .LBB44_2:
849+
; CHECK-NEXT: j .LBB45_3
850+
; CHECK-NEXT: .LBB45_2:
825851
; CHECK-NEXT: not a0, a0
826852
; CHECK-NEXT: lui a3, 524288
827853
; CHECK-NEXT: addi a3, a3, -1
828854
; CHECK-NEXT: srl a0, a3, a0
829855
; CHECK-NEXT: or a0, a1, a0
830-
; CHECK-NEXT: .LBB44_3:
856+
; CHECK-NEXT: .LBB45_3:
831857
; CHECK-NEXT: srai a2, a2, 31
832858
; CHECK-NEXT: and a2, a2, a1
833859
; CHECK-NEXT: not a1, a0

llvm/test/CodeGen/RISCV/rv64zbs.ll

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,32 @@ define i64 @bclr_i64_no_mask(i64 %a, i64 %b) nounwind {
110110
ret i64 %and1
111111
}
112112

113+
define i64 @bclr_i64_mask_multiple(i64 %a, i64 %b, i64 %shamt) nounwind {
114+
; RV64I-LABEL: bclr_i64_mask_multiple:
115+
; RV64I: # %bb.0:
116+
; RV64I-NEXT: li a3, 1
117+
; RV64I-NEXT: sll a2, a3, a2
118+
; RV64I-NEXT: not a3, a2
119+
; RV64I-NEXT: and a0, a3, a0
120+
; RV64I-NEXT: or a1, a1, a2
121+
; RV64I-NEXT: add a0, a0, a1
122+
; RV64I-NEXT: ret
123+
;
124+
; RV64ZBS-LABEL: bclr_i64_mask_multiple:
125+
; RV64ZBS: # %bb.0:
126+
; RV64ZBS-NEXT: bclr a0, a0, a2
127+
; RV64ZBS-NEXT: bset a1, a1, a2
128+
; RV64ZBS-NEXT: add a0, a0, a1
129+
; RV64ZBS-NEXT: ret
130+
%shamt_masked = and i64 %shamt, 63
131+
%shl = shl nuw i64 1, %shamt_masked
132+
%neg = xor i64 %shl, -1
133+
%and = and i64 %neg, %a
134+
%or = or i64 %b, %shl
135+
%c = add i64 %and, %or
136+
ret i64 %c
137+
}
138+
113139
define signext i32 @bset_i32(i32 signext %a, i32 signext %b) nounwind {
114140
; RV64I-LABEL: bset_i32:
115141
; RV64I: # %bb.0:
@@ -372,19 +398,19 @@ define void @bext_i32_trunc(i32 signext %0, i32 signext %1) {
372398
; RV64I: # %bb.0:
373399
; RV64I-NEXT: srlw a0, a0, a1
374400
; RV64I-NEXT: andi a0, a0, 1
375-
; RV64I-NEXT: beqz a0, .LBB19_2
401+
; RV64I-NEXT: beqz a0, .LBB20_2
376402
; RV64I-NEXT: # %bb.1:
377403
; RV64I-NEXT: ret
378-
; RV64I-NEXT: .LBB19_2:
404+
; RV64I-NEXT: .LBB20_2:
379405
; RV64I-NEXT: tail bar
380406
;
381407
; RV64ZBS-LABEL: bext_i32_trunc:
382408
; RV64ZBS: # %bb.0:
383409
; RV64ZBS-NEXT: bext a0, a0, a1
384-
; RV64ZBS-NEXT: beqz a0, .LBB19_2
410+
; RV64ZBS-NEXT: beqz a0, .LBB20_2
385411
; RV64ZBS-NEXT: # %bb.1:
386412
; RV64ZBS-NEXT: ret
387-
; RV64ZBS-NEXT: .LBB19_2:
413+
; RV64ZBS-NEXT: .LBB20_2:
388414
; RV64ZBS-NEXT: tail bar
389415
%3 = shl i32 1, %1
390416
%4 = and i32 %3, %0

0 commit comments

Comments
 (0)