Skip to content

Commit 1700567

Browse files
committed
Move to DAG combine, check for +zve32x
1 parent 3ec5d3b commit 1700567

File tree

5 files changed

+154
-221
lines changed

5 files changed

+154
-221
lines changed

llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp

Lines changed: 3 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
#include "llvm/IR/PatternMatch.h"
2626
#include "llvm/InitializePasses.h"
2727
#include "llvm/Pass.h"
28-
#include "llvm/Transforms/Utils/Local.h"
2928

3029
using namespace llvm;
3130

@@ -63,74 +62,10 @@ class RISCVCodeGenPrepare : public FunctionPass,
6362

6463
} // end anonymous namespace
6564

66-
// InstCombinerImpl::transformZExtICmp will narrow a zext of an icmp with a
67-
// truncation. But RVV doesn't have truncation instructions for more than twice
68-
// the bitwidth.
69-
//
70-
// E.g. trunc <vscale x 1 x i64> %x to <vscale x 1 x i8> will generate:
71-
//
72-
// vsetvli a0, zero, e32, m2, ta, ma
73-
// vnsrl.wi v12, v8, 0
74-
// vsetvli zero, zero, e16, m1, ta, ma
75-
// vnsrl.wi v8, v12, 0
76-
// vsetvli zero, zero, e8, mf2, ta, ma
77-
// vnsrl.wi v8, v8, 0
78-
//
79-
// So reverse the combine so we generate an vmseq/vmsne again:
80-
//
81-
// and (lshr (trunc X), ShAmt), 1
82-
// -->
83-
// zext (icmp ne (and X, (1 << ShAmt)), 0)
84-
//
85-
// and (lshr (not (trunc X)), ShAmt), 1
86-
// -->
87-
// zext (icmp eq (and X, (1 << ShAmt)), 0)
88-
static bool reverseZExtICmpCombine(BinaryOperator &BO) {
89-
using namespace PatternMatch;
90-
91-
assert(BO.getOpcode() == BinaryOperator::And);
92-
93-
if (!BO.getType()->isVectorTy())
94-
return false;
95-
const APInt *ShAmt;
96-
Value *Inner;
97-
if (!match(&BO,
98-
m_And(m_OneUse(m_LShr(m_OneUse(m_Value(Inner)), m_APInt(ShAmt))),
99-
m_One())))
100-
return false;
101-
102-
Value *X;
103-
bool IsNot;
104-
if (match(Inner, m_Not(m_Trunc(m_Value(X)))))
105-
IsNot = true;
106-
else if (match(Inner, m_Trunc(m_Value(X))))
107-
IsNot = false;
108-
else
109-
return false;
110-
111-
if (BO.getType()->getScalarSizeInBits() >=
112-
X->getType()->getScalarSizeInBits() / 2)
113-
return false;
114-
115-
IRBuilder<> Builder(&BO);
116-
Value *Res = Builder.CreateAnd(
117-
X, ConstantInt::get(X->getType(), 1 << ShAmt->getZExtValue()));
118-
Res = Builder.CreateICmp(IsNot ? CmpInst::Predicate::ICMP_EQ
119-
: CmpInst::Predicate::ICMP_NE,
120-
Res, ConstantInt::get(X->getType(), 0));
121-
Res = Builder.CreateZExt(Res, BO.getType());
122-
BO.replaceAllUsesWith(Res);
123-
RecursivelyDeleteTriviallyDeadInstructions(&BO);
124-
return true;
125-
}
126-
65+
// Try to optimize (i64 (and (zext/sext (i32 X), C1))) if C1 has bit 31 set,
66+
// but bits 63:32 are zero. If we know that bit 31 of X is 0, we can fill
67+
// the upper 32 bits with ones.
12768
bool RISCVCodeGenPrepare::visitAnd(BinaryOperator &BO) {
128-
if (reverseZExtICmpCombine(BO))
129-
return true;
130-
131-
// Try to optimize (i64 (and (zext/sext (i32 X), C1))) if C1 has bit 31 set,
132-
// but bits 63:32 are zero. If we know that bit 31 of X is 0, we can fill
133-
// the upper 32 bits with ones.
13469
if (!ST->is64Bit())
13570
return false;
13671

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15040,6 +15040,68 @@ static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG,
1504015040
return combineTruncSelectToSMaxUSat(N, DAG);
1504115041
}
1504215042

15043+
// InstCombinerImpl::transformZExtICmp will narrow a zext of an icmp with a
15044+
// truncation. But RVV doesn't have truncation instructions for more than twice
15045+
// the bitwidth.
15046+
//
15047+
// E.g. trunc <vscale x 1 x i64> %x to <vscale x 1 x i8> will generate:
15048+
//
15049+
// vsetvli a0, zero, e32, m2, ta, ma
15050+
// vnsrl.wi v12, v8, 0
15051+
// vsetvli zero, zero, e16, m1, ta, ma
15052+
// vnsrl.wi v8, v12, 0
15053+
// vsetvli zero, zero, e8, mf2, ta, ma
15054+
// vnsrl.wi v8, v8, 0
15055+
//
15056+
// So reverse the combine so we generate an vmseq/vmsne again:
15057+
//
15058+
// and (lshr (trunc X), ShAmt), 1
15059+
// -->
15060+
// zext (icmp ne (and X, (1 << ShAmt)), 0)
15061+
//
15062+
// and (lshr (not (trunc X)), ShAmt), 1
15063+
// -->
15064+
// zext (icmp eq (and X, (1 << ShAmt)), 0)
15065+
static SDValue reverseZExtICmpCombine(SDNode *N, SelectionDAG &DAG,
15066+
const RISCVSubtarget &Subtarget) {
15067+
using namespace SDPatternMatch;
15068+
SDLoc DL(N);
15069+
15070+
if (!Subtarget.hasVInstructions())
15071+
return SDValue();
15072+
15073+
EVT VT = N->getValueType(0);
15074+
if (!VT.isVector())
15075+
return SDValue();
15076+
15077+
APInt ShAmt;
15078+
SDValue Inner;
15079+
if (!sd_match(N, m_And(m_OneUse(m_Srl(m_Value(Inner), m_ConstInt(ShAmt))),
15080+
m_One())))
15081+
return SDValue();
15082+
15083+
SDValue X;
15084+
bool IsNot;
15085+
if (sd_match(Inner, m_Not(m_Trunc(m_Value(X)))))
15086+
IsNot = true;
15087+
else if (sd_match(Inner, m_Trunc(m_Value(X))))
15088+
IsNot = false;
15089+
else
15090+
return SDValue();
15091+
15092+
EVT WideVT = X.getValueType();
15093+
if (VT.getScalarSizeInBits() >= WideVT.getScalarSizeInBits() / 2)
15094+
return SDValue();
15095+
15096+
SDValue Res =
15097+
DAG.getNode(ISD::AND, DL, WideVT, X,
15098+
DAG.getConstant(1 << ShAmt.getZExtValue(), DL, WideVT));
15099+
Res = DAG.getSetCC(DL, WideVT.changeElementType(MVT::i1), Res,
15100+
DAG.getConstant(0, DL, WideVT),
15101+
IsNot ? ISD::SETEQ : ISD::SETNE);
15102+
return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
15103+
}
15104+
1504315105
// Combines two comparison operation and logic operation to one selection
1504415106
// operation(min, max) and logic operation. Returns new constructed Node if
1504515107
// conditions for optimization are satisfied.
@@ -15067,6 +15129,9 @@ static SDValue performANDCombine(SDNode *N,
1506715129
return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
1506815130
}
1506915131

15132+
if (SDValue V = reverseZExtICmpCombine(N, DAG, Subtarget))
15133+
return V;
15134+
1507015135
if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
1507115136
return V;
1507215137
if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))

llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare-asm.ll

Lines changed: 0 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -498,84 +498,3 @@ vector.body: ; preds = %vector.body, %entry
498498
for.cond.cleanup: ; preds = %vector.body
499499
ret float %red
500500
}
501-
502-
define <vscale x 1 x i8> @reverse_zexticmp_i16(<vscale x 1 x i16> %x) {
503-
; CHECK-LABEL: reverse_zexticmp_i16:
504-
; CHECK: # %bb.0:
505-
; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
506-
; CHECK-NEXT: vnsrl.wi v8, v8, 0
507-
; CHECK-NEXT: vsrl.vi v8, v8, 2
508-
; CHECK-NEXT: vand.vi v8, v8, 1
509-
; CHECK-NEXT: ret
510-
%1 = trunc <vscale x 1 x i16> %x to <vscale x 1 x i8>
511-
%2 = lshr <vscale x 1 x i8> %1, splat (i8 2)
512-
%3 = and <vscale x 1 x i8> %2, splat (i8 1)
513-
ret <vscale x 1 x i8> %3
514-
}
515-
516-
define <vscale x 1 x i8> @reverse_zexticmp_i32(<vscale x 1 x i32> %x) {
517-
; CHECK-LABEL: reverse_zexticmp_i32:
518-
; CHECK: # %bb.0:
519-
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
520-
; CHECK-NEXT: vand.vi v8, v8, 4
521-
; CHECK-NEXT: vmsne.vi v0, v8, 0
522-
; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
523-
; CHECK-NEXT: vmv.v.i v8, 0
524-
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
525-
; CHECK-NEXT: ret
526-
%1 = trunc <vscale x 1 x i32> %x to <vscale x 1 x i8>
527-
%2 = lshr <vscale x 1 x i8> %1, splat (i8 2)
528-
%3 = and <vscale x 1 x i8> %2, splat (i8 1)
529-
ret <vscale x 1 x i8> %3
530-
}
531-
532-
define <vscale x 1 x i8> @reverse_zexticmp_neg_i32(<vscale x 1 x i32> %x) {
533-
; CHECK-LABEL: reverse_zexticmp_neg_i32:
534-
; CHECK: # %bb.0:
535-
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
536-
; CHECK-NEXT: vand.vi v8, v8, 4
537-
; CHECK-NEXT: vmseq.vi v0, v8, 0
538-
; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
539-
; CHECK-NEXT: vmv.v.i v8, 0
540-
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
541-
; CHECK-NEXT: ret
542-
%1 = trunc <vscale x 1 x i32> %x to <vscale x 1 x i8>
543-
%2 = xor <vscale x 1 x i8> %1, splat (i8 -1)
544-
%3 = lshr <vscale x 1 x i8> %2, splat (i8 2)
545-
%4 = and <vscale x 1 x i8> %3, splat (i8 1)
546-
ret <vscale x 1 x i8> %4
547-
}
548-
549-
define <vscale x 1 x i8> @reverse_zexticmp_i64(<vscale x 1 x i64> %x) {
550-
; CHECK-LABEL: reverse_zexticmp_i64:
551-
; CHECK: # %bb.0:
552-
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
553-
; CHECK-NEXT: vand.vi v8, v8, 4
554-
; CHECK-NEXT: vmsne.vi v0, v8, 0
555-
; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
556-
; CHECK-NEXT: vmv.v.i v8, 0
557-
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
558-
; CHECK-NEXT: ret
559-
%1 = trunc <vscale x 1 x i64> %x to <vscale x 1 x i8>
560-
%2 = lshr <vscale x 1 x i8> %1, splat (i8 2)
561-
%3 = and <vscale x 1 x i8> %2, splat (i8 1)
562-
ret <vscale x 1 x i8> %3
563-
}
564-
565-
define <vscale x 1 x i8> @reverse_zexticmp_neg_i64(<vscale x 1 x i64> %x) {
566-
; CHECK-LABEL: reverse_zexticmp_neg_i64:
567-
; CHECK: # %bb.0:
568-
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
569-
; CHECK-NEXT: vand.vi v8, v8, 4
570-
; CHECK-NEXT: vmseq.vi v0, v8, 0
571-
; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
572-
; CHECK-NEXT: vmv.v.i v8, 0
573-
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
574-
; CHECK-NEXT: ret
575-
%1 = trunc <vscale x 1 x i64> %x to <vscale x 1 x i8>
576-
%2 = xor <vscale x 1 x i8> %1, splat (i8 -1)
577-
%3 = lshr <vscale x 1 x i8> %2, splat (i8 2)
578-
%4 = and <vscale x 1 x i8> %3, splat (i8 1)
579-
ret <vscale x 1 x i8> %4
580-
}
581-

llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare.ll

Lines changed: 0 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -528,75 +528,3 @@ vector.body: ; preds = %vector.body, %entry
528528
for.cond.cleanup: ; preds = %vector.body
529529
ret float %red
530530
}
531-
532-
define <vscale x 1 x i8> @reverse_zexticmp_i16(<vscale x 1 x i16> %x) {
533-
; CHECK-LABEL: define <vscale x 1 x i8> @reverse_zexticmp_i16(
534-
; CHECK-SAME: <vscale x 1 x i16> [[X:%.*]]) #[[ATTR2]] {
535-
; CHECK-NEXT: [[TMP1:%.*]] = trunc <vscale x 1 x i16> [[X]] to <vscale x 1 x i8>
536-
; CHECK-NEXT: [[TMP2:%.*]] = lshr <vscale x 1 x i8> [[TMP1]], splat (i8 2)
537-
; CHECK-NEXT: [[TMP3:%.*]] = and <vscale x 1 x i8> [[TMP2]], splat (i8 1)
538-
; CHECK-NEXT: ret <vscale x 1 x i8> [[TMP3]]
539-
;
540-
%1 = trunc <vscale x 1 x i16> %x to <vscale x 1 x i8>
541-
%2 = lshr <vscale x 1 x i8> %1, splat (i8 2)
542-
%3 = and <vscale x 1 x i8> %2, splat (i8 1)
543-
ret <vscale x 1 x i8> %3
544-
}
545-
546-
define <vscale x 1 x i8> @reverse_zexticmp_i32(<vscale x 1 x i32> %x) {
547-
; CHECK-LABEL: define <vscale x 1 x i8> @reverse_zexticmp_i32(
548-
; CHECK-SAME: <vscale x 1 x i32> [[X:%.*]]) #[[ATTR2]] {
549-
; CHECK-NEXT: [[TMP1:%.*]] = and <vscale x 1 x i32> [[X]], splat (i32 4)
550-
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <vscale x 1 x i32> [[TMP1]], zeroinitializer
551-
; CHECK-NEXT: [[TMP3:%.*]] = zext <vscale x 1 x i1> [[TMP2]] to <vscale x 1 x i8>
552-
; CHECK-NEXT: ret <vscale x 1 x i8> [[TMP3]]
553-
;
554-
%1 = trunc <vscale x 1 x i32> %x to <vscale x 1 x i8>
555-
%2 = lshr <vscale x 1 x i8> %1, splat (i8 2)
556-
%3 = and <vscale x 1 x i8> %2, splat (i8 1)
557-
ret <vscale x 1 x i8> %3
558-
}
559-
560-
define <vscale x 1 x i8> @reverse_zexticmp_neg_i32(<vscale x 1 x i32> %x) {
561-
; CHECK-LABEL: define <vscale x 1 x i8> @reverse_zexticmp_neg_i32(
562-
; CHECK-SAME: <vscale x 1 x i32> [[X:%.*]]) #[[ATTR2]] {
563-
; CHECK-NEXT: [[TMP1:%.*]] = and <vscale x 1 x i32> [[X]], splat (i32 4)
564-
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <vscale x 1 x i32> [[TMP1]], zeroinitializer
565-
; CHECK-NEXT: [[TMP4:%.*]] = zext <vscale x 1 x i1> [[TMP2]] to <vscale x 1 x i8>
566-
; CHECK-NEXT: ret <vscale x 1 x i8> [[TMP4]]
567-
;
568-
%1 = trunc <vscale x 1 x i32> %x to <vscale x 1 x i8>
569-
%2 = xor <vscale x 1 x i8> %1, splat (i8 -1)
570-
%3 = lshr <vscale x 1 x i8> %2, splat (i8 2)
571-
%4 = and <vscale x 1 x i8> %3, splat (i8 1)
572-
ret <vscale x 1 x i8> %4
573-
}
574-
575-
define <vscale x 1 x i8> @reverse_zexticmp_i64(<vscale x 1 x i64> %x) {
576-
; CHECK-LABEL: define <vscale x 1 x i8> @reverse_zexticmp_i64(
577-
; CHECK-SAME: <vscale x 1 x i64> [[X:%.*]]) #[[ATTR2]] {
578-
; CHECK-NEXT: [[TMP1:%.*]] = and <vscale x 1 x i64> [[X]], splat (i64 4)
579-
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <vscale x 1 x i64> [[TMP1]], zeroinitializer
580-
; CHECK-NEXT: [[TMP3:%.*]] = zext <vscale x 1 x i1> [[TMP2]] to <vscale x 1 x i8>
581-
; CHECK-NEXT: ret <vscale x 1 x i8> [[TMP3]]
582-
;
583-
%1 = trunc <vscale x 1 x i64> %x to <vscale x 1 x i8>
584-
%2 = lshr <vscale x 1 x i8> %1, splat (i8 2)
585-
%3 = and <vscale x 1 x i8> %2, splat (i8 1)
586-
ret <vscale x 1 x i8> %3
587-
}
588-
589-
define <vscale x 1 x i8> @reverse_zexticmp_neg_i64(<vscale x 1 x i64> %x) {
590-
; CHECK-LABEL: define <vscale x 1 x i8> @reverse_zexticmp_neg_i64(
591-
; CHECK-SAME: <vscale x 1 x i64> [[X:%.*]]) #[[ATTR2]] {
592-
; CHECK-NEXT: [[TMP1:%.*]] = and <vscale x 1 x i64> [[X]], splat (i64 4)
593-
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <vscale x 1 x i64> [[TMP1]], zeroinitializer
594-
; CHECK-NEXT: [[TMP4:%.*]] = zext <vscale x 1 x i1> [[TMP2]] to <vscale x 1 x i8>
595-
; CHECK-NEXT: ret <vscale x 1 x i8> [[TMP4]]
596-
;
597-
%1 = trunc <vscale x 1 x i64> %x to <vscale x 1 x i8>
598-
%2 = xor <vscale x 1 x i8> %1, splat (i8 -1)
599-
%3 = lshr <vscale x 1 x i8> %2, splat (i8 2)
600-
%4 = and <vscale x 1 x i8> %3, splat (i8 1)
601-
ret <vscale x 1 x i8> %4
602-
}

0 commit comments

Comments
 (0)