Skip to content

Commit 511ad50

Browse files
author
Krzysztof Parzyszek
committed
[Hexagon] Rework VLCR algorithm
Add code to catch pattern for commutative instructions for VLCR. Patch by Suyog Sarda. llvm-svn: 364770
1 parent 1b31768 commit 511ad50

File tree

2 files changed

+243
-59
lines changed

2 files changed

+243
-59
lines changed

llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp

Lines changed: 161 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -238,10 +238,17 @@ namespace {
238238
// used over the backedge. This is teh value that gets reused from a
239239
// previous iteration.
240240
Instruction *BackedgeInst = nullptr;
241+
std::map<Instruction *, DepChain *> DepChains;
242+
int Iterations = -1;
241243

242244
ReuseValue() = default;
243245

244-
void reset() { Inst2Replace = nullptr; BackedgeInst = nullptr; }
246+
void reset() {
247+
Inst2Replace = nullptr;
248+
BackedgeInst = nullptr;
249+
DepChains.clear();
250+
Iterations = -1;
251+
}
245252
bool isDefined() { return Inst2Replace != nullptr; }
246253
};
247254

@@ -288,10 +295,10 @@ namespace {
288295
void findDepChainFromPHI(Instruction *I, DepChain &D);
289296
void reuseValue();
290297
Value *findValueInBlock(Value *Op, BasicBlock *BB);
291-
bool isDepChainBtwn(Instruction *I1, Instruction *I2, int Iters);
292-
DepChain *getDepChainBtwn(Instruction *I1, Instruction *I2);
298+
DepChain *getDepChainBtwn(Instruction *I1, Instruction *I2, int Iters);
293299
bool isEquivalentOperation(Instruction *I1, Instruction *I2);
294300
bool canReplace(Instruction *I);
301+
bool isCallInstCommutative(CallInst *C);
295302
};
296303

297304
} // end anonymous namespace
@@ -326,6 +333,70 @@ bool HexagonVectorLoopCarriedReuse::runOnLoop(Loop *L, LPPassManager &LPM) {
326333
return doVLCR();
327334
}
328335

336+
bool HexagonVectorLoopCarriedReuse::isCallInstCommutative(CallInst *C) {
337+
switch (C->getCalledFunction()->getIntrinsicID()) {
338+
case Intrinsic::hexagon_V6_vaddb:
339+
case Intrinsic::hexagon_V6_vaddb_128B:
340+
case Intrinsic::hexagon_V6_vaddh:
341+
case Intrinsic::hexagon_V6_vaddh_128B:
342+
case Intrinsic::hexagon_V6_vaddw:
343+
case Intrinsic::hexagon_V6_vaddw_128B:
344+
case Intrinsic::hexagon_V6_vaddubh:
345+
case Intrinsic::hexagon_V6_vaddubh_128B:
346+
case Intrinsic::hexagon_V6_vadduhw:
347+
case Intrinsic::hexagon_V6_vadduhw_128B:
348+
case Intrinsic::hexagon_V6_vaddhw:
349+
case Intrinsic::hexagon_V6_vaddhw_128B:
350+
case Intrinsic::hexagon_V6_vmaxb:
351+
case Intrinsic::hexagon_V6_vmaxb_128B:
352+
case Intrinsic::hexagon_V6_vmaxh:
353+
case Intrinsic::hexagon_V6_vmaxh_128B:
354+
case Intrinsic::hexagon_V6_vmaxw:
355+
case Intrinsic::hexagon_V6_vmaxw_128B:
356+
case Intrinsic::hexagon_V6_vmaxub:
357+
case Intrinsic::hexagon_V6_vmaxub_128B:
358+
case Intrinsic::hexagon_V6_vmaxuh:
359+
case Intrinsic::hexagon_V6_vmaxuh_128B:
360+
case Intrinsic::hexagon_V6_vminub:
361+
case Intrinsic::hexagon_V6_vminub_128B:
362+
case Intrinsic::hexagon_V6_vminuh:
363+
case Intrinsic::hexagon_V6_vminuh_128B:
364+
case Intrinsic::hexagon_V6_vminb:
365+
case Intrinsic::hexagon_V6_vminb_128B:
366+
case Intrinsic::hexagon_V6_vminh:
367+
case Intrinsic::hexagon_V6_vminh_128B:
368+
case Intrinsic::hexagon_V6_vminw:
369+
case Intrinsic::hexagon_V6_vminw_128B:
370+
case Intrinsic::hexagon_V6_vmpyub:
371+
case Intrinsic::hexagon_V6_vmpyub_128B:
372+
case Intrinsic::hexagon_V6_vmpyuh:
373+
case Intrinsic::hexagon_V6_vmpyuh_128B:
374+
case Intrinsic::hexagon_V6_vavgub:
375+
case Intrinsic::hexagon_V6_vavgub_128B:
376+
case Intrinsic::hexagon_V6_vavgh:
377+
case Intrinsic::hexagon_V6_vavgh_128B:
378+
case Intrinsic::hexagon_V6_vavguh:
379+
case Intrinsic::hexagon_V6_vavguh_128B:
380+
case Intrinsic::hexagon_V6_vavgw:
381+
case Intrinsic::hexagon_V6_vavgw_128B:
382+
case Intrinsic::hexagon_V6_vavgb:
383+
case Intrinsic::hexagon_V6_vavgb_128B:
384+
case Intrinsic::hexagon_V6_vavguw:
385+
case Intrinsic::hexagon_V6_vavguw_128B:
386+
case Intrinsic::hexagon_V6_vabsdiffh:
387+
case Intrinsic::hexagon_V6_vabsdiffh_128B:
388+
case Intrinsic::hexagon_V6_vabsdiffub:
389+
case Intrinsic::hexagon_V6_vabsdiffub_128B:
390+
case Intrinsic::hexagon_V6_vabsdiffuh:
391+
case Intrinsic::hexagon_V6_vabsdiffuh_128B:
392+
case Intrinsic::hexagon_V6_vabsdiffw:
393+
case Intrinsic::hexagon_V6_vabsdiffw_128B:
394+
return true;
395+
default:
396+
return false;
397+
}
398+
}
399+
329400
bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1,
330401
Instruction *I2) {
331402
if (!I1->isSameOperationAs(I2))
@@ -360,13 +431,19 @@ bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1,
360431

361432
bool HexagonVectorLoopCarriedReuse::canReplace(Instruction *I) {
362433
const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
363-
if (II &&
364-
(II->getIntrinsicID() == Intrinsic::hexagon_V6_hi ||
365-
II->getIntrinsicID() == Intrinsic::hexagon_V6_lo)) {
434+
if (!II)
435+
return true;
436+
437+
switch (II->getIntrinsicID()) {
438+
case Intrinsic::hexagon_V6_hi:
439+
case Intrinsic::hexagon_V6_lo:
440+
case Intrinsic::hexagon_V6_hi_128B:
441+
case Intrinsic::hexagon_V6_lo_128B:
366442
LLVM_DEBUG(dbgs() << "Not considering for reuse: " << *II << "\n");
367443
return false;
444+
default:
445+
return true;
368446
}
369-
return true;
370447
}
371448
void HexagonVectorLoopCarriedReuse::findValueToReuse() {
372449
for (auto *D : Dependences) {
@@ -427,34 +504,85 @@ void HexagonVectorLoopCarriedReuse::findValueToReuse() {
427504

428505
int NumOperands = I->getNumOperands();
429506

430-
for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
431-
Value *Op = I->getOperand(OpNo);
432-
Value *BEOp = BEUser->getOperand(OpNo);
433-
434-
Instruction *OpInst = dyn_cast<Instruction>(Op);
435-
if (!OpInst) {
436-
if (Op == BEOp)
437-
continue;
438-
// Do not allow reuse to occur when the operands may be different
439-
// values.
440-
BEUser = nullptr;
441-
break;
507+
// Take operands of each PNUser one by one and try to find DepChain
508+
// with every operand of the BEUser. If any of the operands of BEUser
509+
// has DepChain with current operand of the PNUser, break the matcher
510+
// loop. Keep doing this for Every PNUser operand. If PNUser operand
511+
// does not have DepChain with any of the BEUser operand, break the
512+
// outer matcher loop, mark the BEUser as null and reset the ReuseCandidate.
513+
// This ensures that DepChain exist for all the PNUser operand with
514+
// BEUser operand. This also ensures that DepChains are independent of
515+
// the positions in PNUser and BEUser.
516+
std::map<Instruction *, DepChain *> DepChains;
517+
CallInst *C1 = dyn_cast<CallInst>(I);
518+
if ((I && I->isCommutative()) || (C1 && isCallInstCommutative(C1))) {
519+
bool Found = false;
520+
for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
521+
Value *Op = I->getOperand(OpNo);
522+
Instruction *OpInst = dyn_cast<Instruction>(Op);
523+
Found = false;
524+
for (int T = 0; T < NumOperands; ++T) {
525+
Value *BEOp = BEUser->getOperand(T);
526+
Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
527+
if (!OpInst && !BEOpInst) {
528+
if (Op == BEOp) {
529+
Found = true;
530+
break;
531+
}
532+
}
533+
534+
if ((OpInst && !BEOpInst) || (!OpInst && BEOpInst))
535+
continue;
536+
537+
DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);
538+
539+
if (D) {
540+
Found = true;
541+
DepChains[OpInst] = D;
542+
break;
543+
}
544+
}
545+
if (!Found) {
546+
BEUser = nullptr;
547+
break;
548+
}
442549
}
443-
444-
Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
445-
446-
if (!isDepChainBtwn(OpInst, BEOpInst, Iters)) {
447-
BEUser = nullptr;
448-
break;
550+
} else {
551+
552+
for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
553+
Value *Op = I->getOperand(OpNo);
554+
Value *BEOp = BEUser->getOperand(OpNo);
555+
556+
Instruction *OpInst = dyn_cast<Instruction>(Op);
557+
if (!OpInst) {
558+
if (Op == BEOp)
559+
continue;
560+
// Do not allow reuse to occur when the operands may be different
561+
// values.
562+
BEUser = nullptr;
563+
break;
564+
}
565+
566+
Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
567+
DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);
568+
569+
if (D) {
570+
DepChains[OpInst] = D;
571+
} else {
572+
BEUser = nullptr;
573+
break;
574+
}
449575
}
450576
}
451577
if (BEUser) {
452578
LLVM_DEBUG(dbgs() << "Found Value for reuse.\n");
453579
ReuseCandidate.Inst2Replace = I;
454580
ReuseCandidate.BackedgeInst = BEUser;
581+
ReuseCandidate.DepChains = DepChains;
582+
ReuseCandidate.Iterations = Iters;
455583
return;
456-
} else
457-
ReuseCandidate.reset();
584+
}
585+
ReuseCandidate.reset();
458586
}
459587
}
460588
}
@@ -474,27 +602,10 @@ void HexagonVectorLoopCarriedReuse::reuseValue() {
474602
Instruction *Inst2Replace = ReuseCandidate.Inst2Replace;
475603
Instruction *BEInst = ReuseCandidate.BackedgeInst;
476604
int NumOperands = Inst2Replace->getNumOperands();
477-
std::map<Instruction *, DepChain *> DepChains;
478-
int Iterations = -1;
605+
std::map<Instruction *, DepChain *> &DepChains = ReuseCandidate.DepChains;
606+
int Iterations = ReuseCandidate.Iterations;
479607
BasicBlock *LoopPH = CurLoop->getLoopPreheader();
480-
481-
for (int i = 0; i < NumOperands; ++i) {
482-
Instruction *I = dyn_cast<Instruction>(Inst2Replace->getOperand(i));
483-
if(!I)
484-
continue;
485-
else {
486-
Instruction *J = cast<Instruction>(BEInst->getOperand(i));
487-
DepChain *D = getDepChainBtwn(I, J);
488-
489-
assert(D &&
490-
"No DepChain between corresponding operands in ReuseCandidate\n");
491-
if (Iterations == -1)
492-
Iterations = D->iterations();
493-
assert(Iterations == D->iterations() && "Iterations mismatch");
494-
DepChains[I] = D;
495-
}
496-
}
497-
608+
assert(!DepChains.empty() && "No DepChains");
498609
LLVM_DEBUG(dbgs() << "reuseValue is making the following changes\n");
499610

500611
SmallVector<Instruction *, 4> InstsInPreheader;
@@ -603,20 +714,11 @@ void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I,
603714
}
604715
}
605716

606-
bool HexagonVectorLoopCarriedReuse::isDepChainBtwn(Instruction *I1,
607-
Instruction *I2,
608-
int Iters) {
609-
for (auto *D : Dependences) {
610-
if (D->front() == I1 && D->back() == I2 && D->iterations() == Iters)
611-
return true;
612-
}
613-
return false;
614-
}
615-
616717
DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction *I1,
617-
Instruction *I2) {
718+
Instruction *I2,
719+
int Iters) {
618720
for (auto *D : Dependences) {
619-
if (D->front() == I1 && D->back() == I2)
721+
if (D->front() == I1 && D->back() == I2 && D->iterations() == Iters)
620722
return D;
621723
}
622724
return nullptr;
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
; RUN: opt -march=hexagon < %s -hexagon-vlcr -adce -S | FileCheck %s
2+
3+
; CHECK: %v32.hexagon.vlcr = tail call <32 x i32> @llvm.hexagon.V6.vmaxub.128B
4+
5+
target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
6+
target triple = "hexagon"
7+
8+
@g0 = external local_unnamed_addr global i32, align 4
9+
10+
; Function Attrs: nounwind
11+
define void @f0(i8* noalias nocapture readonly %a0, i8* noalias nocapture %a1, i32 %a2) local_unnamed_addr #0 {
12+
b0:
13+
%v0 = getelementptr inbounds i8, i8* %a0, i32 %a2
14+
%v1 = mul nsw i32 %a2, 2
15+
%v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
16+
%v3 = load i32, i32* @g0, align 4, !tbaa !0
17+
%v4 = icmp sgt i32 %v3, 0
18+
br i1 %v4, label %b1, label %b4
19+
20+
b1: ; preds = %b0
21+
%v5 = bitcast i8* %v2 to <32 x i32>*
22+
%v6 = load <32 x i32>, <32 x i32>* %v5, align 128, !tbaa !4
23+
%v7 = getelementptr inbounds i8, i8* %v2, i32 128
24+
%v8 = bitcast i8* %v7 to <32 x i32>*
25+
%v9 = bitcast i8* %v0 to <32 x i32>*
26+
%v10 = load <32 x i32>, <32 x i32>* %v9, align 128, !tbaa !4
27+
%v11 = getelementptr inbounds i8, i8* %v0, i32 128
28+
%v12 = bitcast i8* %v11 to <32 x i32>*
29+
%v13 = bitcast i8* %a0 to <32 x i32>*
30+
%v14 = load <32 x i32>, <32 x i32>* %v13, align 128, !tbaa !4
31+
%v15 = getelementptr inbounds i8, i8* %a0, i32 128
32+
%v16 = bitcast i8* %v15 to <32 x i32>*
33+
%v17 = bitcast i8* %a1 to <32 x i32>*
34+
br label %b2
35+
36+
b2: ; preds = %b2, %b1
37+
%v18 = phi <32 x i32>* [ %v17, %b1 ], [ %v37, %b2 ]
38+
%v19 = phi <32 x i32>* [ %v8, %b1 ], [ %v30, %b2 ]
39+
%v20 = phi <32 x i32>* [ %v12, %b1 ], [ %v28, %b2 ]
40+
%v21 = phi <32 x i32>* [ %v16, %b1 ], [ %v26, %b2 ]
41+
%v22 = phi i32 [ 0, %b1 ], [ %v38, %b2 ]
42+
%v23 = phi <32 x i32> [ %v14, %b1 ], [ %v27, %b2 ]
43+
%v24 = phi <32 x i32> [ %v10, %b1 ], [ %v29, %b2 ]
44+
%v25 = phi <32 x i32> [ %v6, %b1 ], [ %v31, %b2 ]
45+
%v26 = getelementptr inbounds <32 x i32>, <32 x i32>* %v21, i32 1
46+
%v27 = load <32 x i32>, <32 x i32>* %v21, align 128, !tbaa !4
47+
%v28 = getelementptr inbounds <32 x i32>, <32 x i32>* %v20, i32 1
48+
%v29 = load <32 x i32>, <32 x i32>* %v20, align 128, !tbaa !4
49+
%v30 = getelementptr inbounds <32 x i32>, <32 x i32>* %v19, i32 1
50+
%v31 = load <32 x i32>, <32 x i32>* %v19, align 128, !tbaa !4
51+
%v32 = tail call <32 x i32> @llvm.hexagon.V6.vmaxub.128B(<32 x i32> %v23, <32 x i32> %v24)
52+
%v33 = tail call <32 x i32> @llvm.hexagon.V6.vmaxub.128B(<32 x i32> %v32, <32 x i32> %v25)
53+
%v34 = tail call <32 x i32> @llvm.hexagon.V6.vmaxub.128B(<32 x i32> %v29, <32 x i32> %v27)
54+
%v35 = tail call <32 x i32> @llvm.hexagon.V6.vmaxub.128B(<32 x i32> %v34, <32 x i32> %v31)
55+
%v36 = tail call <32 x i32> @llvm.hexagon.V6.valignbi.128B(<32 x i32> %v35, <32 x i32> %v33, i32 1)
56+
%v37 = getelementptr inbounds <32 x i32>, <32 x i32>* %v18, i32 1
57+
store <32 x i32> %v36, <32 x i32>* %v18, align 128, !tbaa !4
58+
%v38 = add nuw nsw i32 %v22, 128
59+
%v39 = icmp slt i32 %v38, %v3
60+
br i1 %v39, label %b2, label %b3
61+
62+
b3: ; preds = %b2
63+
br label %b4
64+
65+
b4: ; preds = %b3, %b0
66+
ret void
67+
}
68+
69+
; Function Attrs: nounwind readnone
70+
declare <32 x i32> @llvm.hexagon.V6.vmaxub.128B(<32 x i32>, <32 x i32>) #1
71+
72+
; Function Attrs: nounwind readnone
73+
declare <32 x i32> @llvm.hexagon.V6.valignbi.128B(<32 x i32>, <32 x i32>, i32) #1
74+
75+
attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b,-long-calls" }
76+
attributes #1 = { nounwind readnone }
77+
78+
!0 = !{!1, !1, i64 0}
79+
!1 = !{!"int", !2, i64 0}
80+
!2 = !{!"omnipotent char", !3, i64 0}
81+
!3 = !{!"Simple C/C++ TBAA"}
82+
!4 = !{!2, !2, i64 0}

0 commit comments

Comments
 (0)