Skip to content

Commit 5d05cdf

Browse files
committed
[RISCV] Copy isUnneededShiftMask from X86.
In d2927f7, I added patterns to remove (and X, 31) from sllw/srlw/sraw shift amounts. There is code in SelectionDAGISel.cpp that knows to use computeKnownBits to fill in bits of the mask that were removed by SimplifyDemandedBits based on bits being known zero. The non-W shift patterns use immbottomxlenset which allows the mask to have more than log2(xlen) trailing ones, but doesn't have a call to computeKnownBits to fill in bits of the mask that may have been cleared by SimplifyDemandedBits. This patch copies code from X86 to handle more than log2(xlen) bottom bits set and uses computeKnownBits to fill in missing bits before counting. Reviewed By: luismarques Differential Revision: https://reviews.llvm.org/D95422
1 parent b7d6324 commit 5d05cdf

File tree

6 files changed

+401
-308
lines changed

6 files changed

+401
-308
lines changed

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "llvm/IR/IntrinsicsRISCV.h"
1818
#include "llvm/Support/Alignment.h"
1919
#include "llvm/Support/Debug.h"
20+
#include "llvm/Support/KnownBits.h"
2021
#include "llvm/Support/MathExtras.h"
2122
#include "llvm/Support/raw_ostream.h"
2223

@@ -821,6 +822,21 @@ bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) {
821822
return false;
822823
}
823824

825+
// Helper to detect unneeded and instructions on shift amounts. Called
826+
// from PatFrags in tablegen.
827+
bool RISCVDAGToDAGISel::isUnneededShiftMask(SDNode *N, unsigned Width) const {
828+
assert(N->getOpcode() == ISD::AND && "Unexpected opcode");
829+
assert(Width >= 5 && N->getValueSizeInBits(0) >= (1 << Width) &&
830+
"Unexpected width");
831+
const APInt &Val = N->getConstantOperandAPInt(1);
832+
833+
if (Val.countTrailingOnes() >= Width)
834+
return true;
835+
836+
APInt Mask = Val | CurDAG->computeKnownBits(N->getOperand(0)).Zero;
837+
return Mask.countTrailingOnes() >= Width;
838+
}
839+
824840
// Match (srl (and val, mask), imm) where the result would be a
825841
// zero-extended 32-bit integer. i.e. the mask is 0xffffffff or the result
826842
// is equivalent to this (SimplifyDemandedBits may have removed lower bits

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
4545

4646
bool SelectAddrFI(SDValue Addr, SDValue &Base);
4747

48+
bool isUnneededShiftMask(SDNode *N, unsigned Width) const;
49+
4850
bool MatchSRLIW(SDNode *N) const;
4951
bool MatchSLOI(SDNode *N) const;
5052
bool MatchSROI(SDNode *N) const;

llvm/lib/Target/RISCV/RISCVInstrInfo.td

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -289,12 +289,6 @@ def ixlenimm_li : Operand<XLenVT> {
289289
// Standalone (codegen-only) immleaf patterns.
290290
def simm32 : ImmLeaf<XLenVT, [{return isInt<32>(Imm);}]>;
291291
def simm32hi20 : ImmLeaf<XLenVT, [{return isShiftedInt<20, 12>(Imm);}]>;
292-
// A mask value that won't affect significant shift bits.
293-
def immbottomxlenset : ImmLeaf<XLenVT, [{
294-
if (Subtarget->is64Bit())
295-
return countTrailingOnes<uint64_t>(Imm) >= 6;
296-
return countTrailingOnes<uint64_t>(Imm) >= 5;
297-
}]>;
298292

299293
// A 6-bit constant greater than 32.
300294
def uimm6gt32 : ImmLeaf<XLenVT, [{
@@ -901,14 +895,21 @@ def : PatGprUimmLog2XLen<sra, SRAI>;
901895
// typically introduced when the legalizer promotes the shift amount and
902896
// zero-extends it). For RISC-V, the mask is unnecessary as shifts in the base
903897
// ISA only read the least significant 5 bits (RV32I) or 6 bits (RV64I).
898+
def shiftMaskXLen : PatFrag<(ops node:$lhs), (and node:$lhs, imm), [{
899+
return isUnneededShiftMask(N, Subtarget->is64Bit() ? 6 : 5);
900+
}]>;
901+
def shiftMask32 : PatFrag<(ops node:$lhs), (and node:$lhs, imm), [{
902+
return isUnneededShiftMask(N, 5);
903+
}]>;
904+
904905
class shiftop<SDPatternOperator operator>
905906
: PatFrags<(ops node:$val, node:$count),
906907
[(operator node:$val, node:$count),
907-
(operator node:$val, (and node:$count, immbottomxlenset))]>;
908+
(operator node:$val, (shiftMaskXLen node:$count))]>;
908909
class shiftopw<SDPatternOperator operator>
909910
: PatFrags<(ops node:$val, node:$count),
910911
[(operator node:$val, node:$count),
911-
(operator node:$val, (and node:$count, (XLenVT 31)))]>;
912+
(operator node:$val, (shiftMask32 node:$count))]>;
912913

913914
def : PatGprGpr<shiftop<shl>, SLL>;
914915
def : PatGprGpr<shiftop<srl>, SRL>;

llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ define void @cmpxchg_i8_monotonic_monotonic(i8* %ptr, i8 %cmp, i8 %val) nounwind
2626
; RV32IA: # %bb.0:
2727
; RV32IA-NEXT: andi a3, a0, -4
2828
; RV32IA-NEXT: slli a0, a0, 3
29-
; RV32IA-NEXT: andi a0, a0, 24
3029
; RV32IA-NEXT: addi a4, zero, 255
3130
; RV32IA-NEXT: sll a4, a4, a0
3231
; RV32IA-NEXT: andi a1, a1, 255
@@ -103,7 +102,6 @@ define void @cmpxchg_i8_acquire_monotonic(i8* %ptr, i8 %cmp, i8 %val) nounwind {
103102
; RV32IA: # %bb.0:
104103
; RV32IA-NEXT: andi a3, a0, -4
105104
; RV32IA-NEXT: slli a0, a0, 3
106-
; RV32IA-NEXT: andi a0, a0, 24
107105
; RV32IA-NEXT: addi a4, zero, 255
108106
; RV32IA-NEXT: sll a4, a4, a0
109107
; RV32IA-NEXT: andi a1, a1, 255
@@ -180,7 +178,6 @@ define void @cmpxchg_i8_acquire_acquire(i8* %ptr, i8 %cmp, i8 %val) nounwind {
180178
; RV32IA: # %bb.0:
181179
; RV32IA-NEXT: andi a3, a0, -4
182180
; RV32IA-NEXT: slli a0, a0, 3
183-
; RV32IA-NEXT: andi a0, a0, 24
184181
; RV32IA-NEXT: addi a4, zero, 255
185182
; RV32IA-NEXT: sll a4, a4, a0
186183
; RV32IA-NEXT: andi a1, a1, 255
@@ -257,7 +254,6 @@ define void @cmpxchg_i8_release_monotonic(i8* %ptr, i8 %cmp, i8 %val) nounwind {
257254
; RV32IA: # %bb.0:
258255
; RV32IA-NEXT: andi a3, a0, -4
259256
; RV32IA-NEXT: slli a0, a0, 3
260-
; RV32IA-NEXT: andi a0, a0, 24
261257
; RV32IA-NEXT: addi a4, zero, 255
262258
; RV32IA-NEXT: sll a4, a4, a0
263259
; RV32IA-NEXT: andi a1, a1, 255
@@ -334,7 +330,6 @@ define void @cmpxchg_i8_release_acquire(i8* %ptr, i8 %cmp, i8 %val) nounwind {
334330
; RV32IA: # %bb.0:
335331
; RV32IA-NEXT: andi a3, a0, -4
336332
; RV32IA-NEXT: slli a0, a0, 3
337-
; RV32IA-NEXT: andi a0, a0, 24
338333
; RV32IA-NEXT: addi a4, zero, 255
339334
; RV32IA-NEXT: sll a4, a4, a0
340335
; RV32IA-NEXT: andi a1, a1, 255
@@ -411,7 +406,6 @@ define void @cmpxchg_i8_acq_rel_monotonic(i8* %ptr, i8 %cmp, i8 %val) nounwind {
411406
; RV32IA: # %bb.0:
412407
; RV32IA-NEXT: andi a3, a0, -4
413408
; RV32IA-NEXT: slli a0, a0, 3
414-
; RV32IA-NEXT: andi a0, a0, 24
415409
; RV32IA-NEXT: addi a4, zero, 255
416410
; RV32IA-NEXT: sll a4, a4, a0
417411
; RV32IA-NEXT: andi a1, a1, 255
@@ -488,7 +482,6 @@ define void @cmpxchg_i8_acq_rel_acquire(i8* %ptr, i8 %cmp, i8 %val) nounwind {
488482
; RV32IA: # %bb.0:
489483
; RV32IA-NEXT: andi a3, a0, -4
490484
; RV32IA-NEXT: slli a0, a0, 3
491-
; RV32IA-NEXT: andi a0, a0, 24
492485
; RV32IA-NEXT: addi a4, zero, 255
493486
; RV32IA-NEXT: sll a4, a4, a0
494487
; RV32IA-NEXT: andi a1, a1, 255
@@ -565,7 +558,6 @@ define void @cmpxchg_i8_seq_cst_monotonic(i8* %ptr, i8 %cmp, i8 %val) nounwind {
565558
; RV32IA: # %bb.0:
566559
; RV32IA-NEXT: andi a3, a0, -4
567560
; RV32IA-NEXT: slli a0, a0, 3
568-
; RV32IA-NEXT: andi a0, a0, 24
569561
; RV32IA-NEXT: addi a4, zero, 255
570562
; RV32IA-NEXT: sll a4, a4, a0
571563
; RV32IA-NEXT: andi a1, a1, 255
@@ -642,7 +634,6 @@ define void @cmpxchg_i8_seq_cst_acquire(i8* %ptr, i8 %cmp, i8 %val) nounwind {
642634
; RV32IA: # %bb.0:
643635
; RV32IA-NEXT: andi a3, a0, -4
644636
; RV32IA-NEXT: slli a0, a0, 3
645-
; RV32IA-NEXT: andi a0, a0, 24
646637
; RV32IA-NEXT: addi a4, zero, 255
647638
; RV32IA-NEXT: sll a4, a4, a0
648639
; RV32IA-NEXT: andi a1, a1, 255
@@ -719,7 +710,6 @@ define void @cmpxchg_i8_seq_cst_seq_cst(i8* %ptr, i8 %cmp, i8 %val) nounwind {
719710
; RV32IA: # %bb.0:
720711
; RV32IA-NEXT: andi a3, a0, -4
721712
; RV32IA-NEXT: slli a0, a0, 3
722-
; RV32IA-NEXT: andi a0, a0, 24
723713
; RV32IA-NEXT: addi a4, zero, 255
724714
; RV32IA-NEXT: sll a4, a4, a0
725715
; RV32IA-NEXT: andi a1, a1, 255
@@ -796,7 +786,6 @@ define void @cmpxchg_i16_monotonic_monotonic(i16* %ptr, i16 %cmp, i16 %val) noun
796786
; RV32IA: # %bb.0:
797787
; RV32IA-NEXT: andi a3, a0, -4
798788
; RV32IA-NEXT: slli a0, a0, 3
799-
; RV32IA-NEXT: andi a0, a0, 24
800789
; RV32IA-NEXT: lui a4, 16
801790
; RV32IA-NEXT: addi a4, a4, -1
802791
; RV32IA-NEXT: sll a5, a4, a0
@@ -875,7 +864,6 @@ define void @cmpxchg_i16_acquire_monotonic(i16* %ptr, i16 %cmp, i16 %val) nounwi
875864
; RV32IA: # %bb.0:
876865
; RV32IA-NEXT: andi a3, a0, -4
877866
; RV32IA-NEXT: slli a0, a0, 3
878-
; RV32IA-NEXT: andi a0, a0, 24
879867
; RV32IA-NEXT: lui a4, 16
880868
; RV32IA-NEXT: addi a4, a4, -1
881869
; RV32IA-NEXT: sll a5, a4, a0
@@ -954,7 +942,6 @@ define void @cmpxchg_i16_acquire_acquire(i16* %ptr, i16 %cmp, i16 %val) nounwind
954942
; RV32IA: # %bb.0:
955943
; RV32IA-NEXT: andi a3, a0, -4
956944
; RV32IA-NEXT: slli a0, a0, 3
957-
; RV32IA-NEXT: andi a0, a0, 24
958945
; RV32IA-NEXT: lui a4, 16
959946
; RV32IA-NEXT: addi a4, a4, -1
960947
; RV32IA-NEXT: sll a5, a4, a0
@@ -1033,7 +1020,6 @@ define void @cmpxchg_i16_release_monotonic(i16* %ptr, i16 %cmp, i16 %val) nounwi
10331020
; RV32IA: # %bb.0:
10341021
; RV32IA-NEXT: andi a3, a0, -4
10351022
; RV32IA-NEXT: slli a0, a0, 3
1036-
; RV32IA-NEXT: andi a0, a0, 24
10371023
; RV32IA-NEXT: lui a4, 16
10381024
; RV32IA-NEXT: addi a4, a4, -1
10391025
; RV32IA-NEXT: sll a5, a4, a0
@@ -1112,7 +1098,6 @@ define void @cmpxchg_i16_release_acquire(i16* %ptr, i16 %cmp, i16 %val) nounwind
11121098
; RV32IA: # %bb.0:
11131099
; RV32IA-NEXT: andi a3, a0, -4
11141100
; RV32IA-NEXT: slli a0, a0, 3
1115-
; RV32IA-NEXT: andi a0, a0, 24
11161101
; RV32IA-NEXT: lui a4, 16
11171102
; RV32IA-NEXT: addi a4, a4, -1
11181103
; RV32IA-NEXT: sll a5, a4, a0
@@ -1191,7 +1176,6 @@ define void @cmpxchg_i16_acq_rel_monotonic(i16* %ptr, i16 %cmp, i16 %val) nounwi
11911176
; RV32IA: # %bb.0:
11921177
; RV32IA-NEXT: andi a3, a0, -4
11931178
; RV32IA-NEXT: slli a0, a0, 3
1194-
; RV32IA-NEXT: andi a0, a0, 24
11951179
; RV32IA-NEXT: lui a4, 16
11961180
; RV32IA-NEXT: addi a4, a4, -1
11971181
; RV32IA-NEXT: sll a5, a4, a0
@@ -1270,7 +1254,6 @@ define void @cmpxchg_i16_acq_rel_acquire(i16* %ptr, i16 %cmp, i16 %val) nounwind
12701254
; RV32IA: # %bb.0:
12711255
; RV32IA-NEXT: andi a3, a0, -4
12721256
; RV32IA-NEXT: slli a0, a0, 3
1273-
; RV32IA-NEXT: andi a0, a0, 24
12741257
; RV32IA-NEXT: lui a4, 16
12751258
; RV32IA-NEXT: addi a4, a4, -1
12761259
; RV32IA-NEXT: sll a5, a4, a0
@@ -1349,7 +1332,6 @@ define void @cmpxchg_i16_seq_cst_monotonic(i16* %ptr, i16 %cmp, i16 %val) nounwi
13491332
; RV32IA: # %bb.0:
13501333
; RV32IA-NEXT: andi a3, a0, -4
13511334
; RV32IA-NEXT: slli a0, a0, 3
1352-
; RV32IA-NEXT: andi a0, a0, 24
13531335
; RV32IA-NEXT: lui a4, 16
13541336
; RV32IA-NEXT: addi a4, a4, -1
13551337
; RV32IA-NEXT: sll a5, a4, a0
@@ -1428,7 +1410,6 @@ define void @cmpxchg_i16_seq_cst_acquire(i16* %ptr, i16 %cmp, i16 %val) nounwind
14281410
; RV32IA: # %bb.0:
14291411
; RV32IA-NEXT: andi a3, a0, -4
14301412
; RV32IA-NEXT: slli a0, a0, 3
1431-
; RV32IA-NEXT: andi a0, a0, 24
14321413
; RV32IA-NEXT: lui a4, 16
14331414
; RV32IA-NEXT: addi a4, a4, -1
14341415
; RV32IA-NEXT: sll a5, a4, a0
@@ -1507,7 +1488,6 @@ define void @cmpxchg_i16_seq_cst_seq_cst(i16* %ptr, i16 %cmp, i16 %val) nounwind
15071488
; RV32IA: # %bb.0:
15081489
; RV32IA-NEXT: andi a3, a0, -4
15091490
; RV32IA-NEXT: slli a0, a0, 3
1510-
; RV32IA-NEXT: andi a0, a0, 24
15111491
; RV32IA-NEXT: lui a4, 16
15121492
; RV32IA-NEXT: addi a4, a4, -1
15131493
; RV32IA-NEXT: sll a5, a4, a0

0 commit comments

Comments
 (0)