Skip to content

Commit e5bb946

Browse files
fhossein-quicpkarvetiSergeiYLarinPavani Karveti
authored
Bug fixes for ISelLowering for HVX (#164416)
1. createHvxPrefixPred was computing an invalid byte count for small predicate types, leading to a crash during instruction selection. 2. HexagonTargetLowering::SplitHvxMemOp assumed the memory vector type is always simple. This patch adds a guard to avoid processing non-simple vector types, which can lead to failure. Patch By: Fateme Hosseini Co-authored-by: pavani karveti <[email protected]> Co-authored-by: Sergei Larin <[email protected]> Co-authored-by: Pavani Karveti <[email protected]>
1 parent d30bd27 commit e5bb946

File tree

2 files changed

+121
-2
lines changed

2 files changed

+121
-2
lines changed

llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1061,8 +1061,11 @@ HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
10611061
SDValue W0 = isUndef(PredV)
10621062
? DAG.getUNDEF(MVT::i64)
10631063
: DAG.getNode(HexagonISD::P2D, dl, MVT::i64, PredV);
1064-
Words[IdxW].push_back(HiHalf(W0, DAG));
1065-
Words[IdxW].push_back(LoHalf(W0, DAG));
1064+
if (Bytes < BitBytes) {
1065+
Words[IdxW].push_back(HiHalf(W0, DAG));
1066+
Words[IdxW].push_back(LoHalf(W0, DAG));
1067+
} else
1068+
Words[IdxW].push_back(W0);
10661069

10671070
while (Bytes < BitBytes) {
10681071
IdxW ^= 1;
@@ -1083,7 +1086,26 @@ HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
10831086
Bytes *= 2;
10841087
}
10851088

1089+
while (Bytes > BitBytes) {
1090+
IdxW ^= 1;
1091+
Words[IdxW].clear();
1092+
1093+
if (Bytes <= 4) {
1094+
for (const SDValue &W : Words[IdxW ^ 1]) {
1095+
SDValue T = contractPredicate(W, dl, DAG);
1096+
Words[IdxW].push_back(T);
1097+
}
1098+
} else {
1099+
for (const SDValue &W : Words[IdxW ^ 1]) {
1100+
Words[IdxW].push_back(W);
1101+
}
1102+
}
1103+
Bytes /= 2;
1104+
}
1105+
10861106
assert(Bytes == BitBytes);
1107+
if (BitBytes == 1 && PredTy == MVT::v2i1)
1108+
ByteTy = MVT::getVectorVT(MVT::i16, HwLen);
10871109

10881110
SDValue Vec = ZeroFill ? getZero(dl, ByteTy, DAG) : DAG.getUNDEF(ByteTy);
10891111
SDValue S4 = DAG.getConstant(HwLen-4, dl, MVT::i32);
@@ -3157,6 +3179,9 @@ SDValue
31573179
HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const {
31583180
auto *MemN = cast<MemSDNode>(Op.getNode());
31593181

3182+
if (!MemN->getMemoryVT().isSimple())
3183+
return Op;
3184+
31603185
MVT MemTy = MemN->getMemoryVT().getSimpleVT();
31613186
if (!isHvxPairTy(MemTy))
31623187
return Op;
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
;; REQUIRES: asserts
2+
;; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b %s -o - | FileCheck %s
3+
;; Sanity check for lowering masked scatter without assertion errors.
4+
5+
define void @outer_product(ptr %aptr, ptr %bptr, ptr %cptr, i32 %T, i32 %W) {
6+
entry:
7+
%W.ripple.bcast.splatinsert = insertelement <8 x i32> poison, i32 %W, i64 0
8+
%W.ripple.bcast.splat = shufflevector <8 x i32> %W.ripple.bcast.splatinsert, <8 x i32> poison, <8 x i32> zeroinitializer
9+
%div1194 = lshr i32 %T, 3
10+
%cmp84.not = icmp ult i32 %T, 8
11+
br i1 %cmp84.not, label %for.end49, label %for.body.preheader
12+
13+
for.body.preheader: ; preds = %entry
14+
%div10195 = lshr i32 %W, 3
15+
%cmp1782.not = icmp ult i32 %W, 8
16+
%arrayidx27.ripple.LS.dim.slope = mul <8 x i32> %W.ripple.bcast.splat, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
17+
%arrayidx27.ripple.LS.dim.slope.ripple.bcast = shufflevector <8 x i32> %arrayidx27.ripple.LS.dim.slope, <8 x i32> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
18+
%arrayidx27.ripple.LS.slope = add <64 x i32> %arrayidx27.ripple.LS.dim.slope.ripple.bcast, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19+
%invariant.gep196 = getelementptr i8, ptr %cptr, <64 x i32> %arrayidx27.ripple.LS.slope
20+
br label %for.body
21+
22+
for.body: ; preds = %for.end, %for.body.preheader
23+
%ripple.par.iv.085 = phi i32 [ %add48, %for.end ], [ 0, %for.body.preheader ]
24+
%mul2 = shl i32 %ripple.par.iv.085, 3
25+
br i1 %cmp1782.not, label %for.end, label %for.body18.lr.ph
26+
27+
for.body18.lr.ph: ; preds = %for.body
28+
%arrayidx = getelementptr inbounds nuw i8, ptr %aptr, i32 %mul2
29+
%mul25 = mul i32 %mul2, %W
30+
%gep197 = getelementptr i8, <64 x ptr> %invariant.gep196, i32 %mul25
31+
br label %for.body18
32+
33+
for.body18: ; preds = %for.body18, %for.body18.lr.ph
34+
%ripple.par.iv15.083 = phi i32 [ 0, %for.body18.lr.ph ], [ %add28, %for.body18 ]
35+
%mul19 = shl i32 %ripple.par.iv15.083, 3
36+
%.ripple.LS.instance184 = load <8 x i8>, ptr %arrayidx, align 1
37+
%.ripple.LS.instance184.ripple.bcast = shufflevector <8 x i8> %.ripple.LS.instance184, <8 x i8> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
38+
%arrayidx21 = getelementptr inbounds nuw i8, ptr %bptr, i32 %mul19
39+
%.ripple.LS.instance = load <8 x i8>, ptr %arrayidx21, align 1
40+
%.ripple.LS.instance.ripple.bcast = shufflevector <8 x i8> %.ripple.LS.instance, <8 x i8> poison, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
41+
%mul23.ripple.LS.instance = mul <64 x i8> %.ripple.LS.instance.ripple.bcast, %.ripple.LS.instance184.ripple.bcast
42+
%gep = getelementptr i8, <64 x ptr> %gep197, i32 %mul19
43+
tail call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> %mul23.ripple.LS.instance, <64 x ptr> %gep, i32 1, <64 x i1> splat (i1 true))
44+
%add28 = add nuw i32 %ripple.par.iv15.083, 1
45+
%cmp17 = icmp ult i32 %add28, %div10195
46+
br i1 %cmp17, label %for.body18, label %for.end.loopexit
47+
48+
for.end.loopexit: ; preds = %for.body18
49+
%0 = shl i32 %add28, 3
50+
br label %for.end
51+
52+
for.end: ; preds = %for.end.loopexit, %for.body
53+
%ripple.par.iv15.0.lcssa = phi i32 [ 0, %for.body ], [ %0, %for.end.loopexit ]
54+
%add30.ripple.bcast.splatinsert = insertelement <8 x i32> poison, i32 %ripple.par.iv15.0.lcssa, i64 0
55+
%add30.ripple.bcast.splat = shufflevector <8 x i32> %add30.ripple.bcast.splatinsert, <8 x i32> poison, <8 x i32> zeroinitializer
56+
%add30.ripple.LS.instance = or disjoint <8 x i32> %add30.ripple.bcast.splat, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
57+
%cmp32.ripple.LS.instance = icmp ne i32 %ripple.par.iv15.0.lcssa, %W
58+
%cmp32.ripple.LS.instance.ripple.bcast.splatinsert = insertelement <8 x i1> poison, i1 %cmp32.ripple.LS.instance, i64 0
59+
%cmp32.ripple.LS.instance.ripple.bcast.splat = shufflevector <8 x i1> %cmp32.ripple.LS.instance.ripple.bcast.splatinsert, <8 x i1> poison, <8 x i32> zeroinitializer
60+
%cmp33.ripple.vectorized = icmp ult <8 x i32> %add30.ripple.LS.instance, %W.ripple.bcast.splat
61+
%or.cond.ripple.LS.instance = select <8 x i1> %cmp32.ripple.LS.instance.ripple.bcast.splat, <8 x i1> %cmp33.ripple.vectorized, <8 x i1> zeroinitializer
62+
%or.cond.ripple.LS.instance.ripple.bcast = shufflevector <8 x i1> %or.cond.ripple.LS.instance, <8 x i1> poison, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
63+
%or.cond.ripple.LS.instance.ripple.reducelog2.shuffle = shufflevector <8 x i1> %or.cond.ripple.LS.instance, <8 x i1> <i1 poison, i1 poison, i1 poison, i1 poison, i1 poison, i1 poison, i1 poison, i1 false>, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 15>
64+
%or.cond.ripple.LS.instance.ripple.reducelog2.operator = or <8 x i1> %or.cond.ripple.LS.instance, %or.cond.ripple.LS.instance.ripple.reducelog2.shuffle
65+
%or.cond.ripple.LS.instance.ripple.reducelog2.shuffle189 = shufflevector <8 x i1> %or.cond.ripple.LS.instance.ripple.reducelog2.operator, <8 x i1> <i1 poison, i1 poison, i1 poison, i1 poison, i1 poison, i1 poison, i1 false, i1 false>, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 14, i32 15>
66+
%or.cond.ripple.LS.instance.ripple.reducelog2.operator190 = or <8 x i1> %or.cond.ripple.LS.instance.ripple.reducelog2.operator, %or.cond.ripple.LS.instance.ripple.reducelog2.shuffle189
67+
%or.cond.ripple.LS.instance.ripple.reducelog2.shuffle191 = shufflevector <8 x i1> %or.cond.ripple.LS.instance.ripple.reducelog2.operator190, <8 x i1> poison, <8 x i32> <i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
68+
%or.cond.ripple.LS.instance.ripple.reducelog2.operator192 = or <8 x i1> %or.cond.ripple.LS.instance.ripple.reducelog2.operator190, %or.cond.ripple.LS.instance.ripple.reducelog2.shuffle191
69+
%ripple.red.extract.ripple.bcast.splat = shufflevector <8 x i1> %or.cond.ripple.LS.instance.ripple.reducelog2.operator192, <8 x i1> poison, <8 x i32> zeroinitializer
70+
%arrayidx34.ripple.branch.clone = getelementptr inbounds nuw i8, ptr %aptr, i32 %mul2
71+
%.ripple.LS.instance188.ripple.branch.clone.ripple.masked.load = tail call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %arrayidx34.ripple.branch.clone, i32 1, <8 x i1> %ripple.red.extract.ripple.bcast.splat, <8 x i8> poison)
72+
%.ripple.LS.instance188.ripple.bcast.ripple.branch.clone = shufflevector <8 x i8> %.ripple.LS.instance188.ripple.branch.clone.ripple.masked.load, <8 x i8> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
73+
%arrayidx36.ripple.branch.clone = getelementptr inbounds nuw i8, ptr %bptr, i32 %ripple.par.iv15.0.lcssa
74+
%.ripple.LS.instance187.ripple.branch.clone.ripple.masked.load = tail call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %arrayidx36.ripple.branch.clone, i32 1, <8 x i1> %or.cond.ripple.LS.instance, <8 x i8> poison)
75+
%.ripple.LS.instance187.ripple.bcast.ripple.branch.clone = shufflevector <8 x i8> %.ripple.LS.instance187.ripple.branch.clone.ripple.masked.load, <8 x i8> poison, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
76+
%mul38.ripple.LS.instance.ripple.branch.clone = mul <64 x i8> %.ripple.LS.instance187.ripple.bcast.ripple.branch.clone, %.ripple.LS.instance188.ripple.bcast.ripple.branch.clone
77+
%mul40.ripple.branch.clone = mul i32 %mul2, %W
78+
%1 = getelementptr i8, ptr %cptr, i32 %mul40.ripple.branch.clone
79+
%arrayidx42.ripple.branch.clone = getelementptr i8, ptr %1, i32 %ripple.par.iv15.0.lcssa
80+
%arrayidx42.ripple.LS.instance.ripple.branch.clone = getelementptr i8, ptr %arrayidx42.ripple.branch.clone, <64 x i32> %arrayidx27.ripple.LS.slope
81+
tail call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> %mul38.ripple.LS.instance.ripple.branch.clone, <64 x ptr> %arrayidx42.ripple.LS.instance.ripple.branch.clone, i32 1, <64 x i1> %or.cond.ripple.LS.instance.ripple.bcast)
82+
%add48 = add nuw i32 %ripple.par.iv.085, 1
83+
%cmp = icmp ult i32 %add48, %div1194
84+
br i1 %cmp, label %for.body, label %for.end49
85+
86+
for.end49: ; preds = %for.end, %entry
87+
ret void
88+
}
89+
90+
;; CHECK: outer_product
91+
;; CHECK: {{r[0-9]+}} = lsr({{r[0-9]+}},#3)
92+
;; CHECK: {{q[0-9]+}} = vand({{v[0-9]+}},{{r[0-9]+}})
93+
;; CHECK: {{v[0-9]+}} = vmux(q0,{{v[0-9]+}},{{v[0-9]+}})
94+
;; CHECK: vmem{{.*}} = {{v[0-9]+}}

0 commit comments

Comments
 (0)