@@ -238,10 +238,17 @@ namespace {
238
238
// used over the backedge. This is teh value that gets reused from a
239
239
// previous iteration.
240
240
Instruction *BackedgeInst = nullptr ;
241
+ std::map<Instruction *, DepChain *> DepChains;
242
+ int Iterations = -1 ;
241
243
242
244
ReuseValue () = default ;
243
245
244
- void reset () { Inst2Replace = nullptr ; BackedgeInst = nullptr ; }
246
+ void reset () {
247
+ Inst2Replace = nullptr ;
248
+ BackedgeInst = nullptr ;
249
+ DepChains.clear ();
250
+ Iterations = -1 ;
251
+ }
245
252
bool isDefined () { return Inst2Replace != nullptr ; }
246
253
};
247
254
@@ -288,10 +295,10 @@ namespace {
288
295
void findDepChainFromPHI (Instruction *I, DepChain &D);
289
296
void reuseValue ();
290
297
Value *findValueInBlock (Value *Op, BasicBlock *BB);
291
- bool isDepChainBtwn (Instruction *I1, Instruction *I2, int Iters);
292
- DepChain *getDepChainBtwn (Instruction *I1, Instruction *I2);
298
+ DepChain *getDepChainBtwn (Instruction *I1, Instruction *I2, int Iters);
293
299
bool isEquivalentOperation (Instruction *I1, Instruction *I2);
294
300
bool canReplace (Instruction *I);
301
+ bool isCallInstCommutative (CallInst *C);
295
302
};
296
303
297
304
} // end anonymous namespace
@@ -326,6 +333,70 @@ bool HexagonVectorLoopCarriedReuse::runOnLoop(Loop *L, LPPassManager &LPM) {
326
333
return doVLCR ();
327
334
}
328
335
336
+ bool HexagonVectorLoopCarriedReuse::isCallInstCommutative (CallInst *C) {
337
+ switch (C->getCalledFunction ()->getIntrinsicID ()) {
338
+ case Intrinsic::hexagon_V6_vaddb:
339
+ case Intrinsic::hexagon_V6_vaddb_128B:
340
+ case Intrinsic::hexagon_V6_vaddh:
341
+ case Intrinsic::hexagon_V6_vaddh_128B:
342
+ case Intrinsic::hexagon_V6_vaddw:
343
+ case Intrinsic::hexagon_V6_vaddw_128B:
344
+ case Intrinsic::hexagon_V6_vaddubh:
345
+ case Intrinsic::hexagon_V6_vaddubh_128B:
346
+ case Intrinsic::hexagon_V6_vadduhw:
347
+ case Intrinsic::hexagon_V6_vadduhw_128B:
348
+ case Intrinsic::hexagon_V6_vaddhw:
349
+ case Intrinsic::hexagon_V6_vaddhw_128B:
350
+ case Intrinsic::hexagon_V6_vmaxb:
351
+ case Intrinsic::hexagon_V6_vmaxb_128B:
352
+ case Intrinsic::hexagon_V6_vmaxh:
353
+ case Intrinsic::hexagon_V6_vmaxh_128B:
354
+ case Intrinsic::hexagon_V6_vmaxw:
355
+ case Intrinsic::hexagon_V6_vmaxw_128B:
356
+ case Intrinsic::hexagon_V6_vmaxub:
357
+ case Intrinsic::hexagon_V6_vmaxub_128B:
358
+ case Intrinsic::hexagon_V6_vmaxuh:
359
+ case Intrinsic::hexagon_V6_vmaxuh_128B:
360
+ case Intrinsic::hexagon_V6_vminub:
361
+ case Intrinsic::hexagon_V6_vminub_128B:
362
+ case Intrinsic::hexagon_V6_vminuh:
363
+ case Intrinsic::hexagon_V6_vminuh_128B:
364
+ case Intrinsic::hexagon_V6_vminb:
365
+ case Intrinsic::hexagon_V6_vminb_128B:
366
+ case Intrinsic::hexagon_V6_vminh:
367
+ case Intrinsic::hexagon_V6_vminh_128B:
368
+ case Intrinsic::hexagon_V6_vminw:
369
+ case Intrinsic::hexagon_V6_vminw_128B:
370
+ case Intrinsic::hexagon_V6_vmpyub:
371
+ case Intrinsic::hexagon_V6_vmpyub_128B:
372
+ case Intrinsic::hexagon_V6_vmpyuh:
373
+ case Intrinsic::hexagon_V6_vmpyuh_128B:
374
+ case Intrinsic::hexagon_V6_vavgub:
375
+ case Intrinsic::hexagon_V6_vavgub_128B:
376
+ case Intrinsic::hexagon_V6_vavgh:
377
+ case Intrinsic::hexagon_V6_vavgh_128B:
378
+ case Intrinsic::hexagon_V6_vavguh:
379
+ case Intrinsic::hexagon_V6_vavguh_128B:
380
+ case Intrinsic::hexagon_V6_vavgw:
381
+ case Intrinsic::hexagon_V6_vavgw_128B:
382
+ case Intrinsic::hexagon_V6_vavgb:
383
+ case Intrinsic::hexagon_V6_vavgb_128B:
384
+ case Intrinsic::hexagon_V6_vavguw:
385
+ case Intrinsic::hexagon_V6_vavguw_128B:
386
+ case Intrinsic::hexagon_V6_vabsdiffh:
387
+ case Intrinsic::hexagon_V6_vabsdiffh_128B:
388
+ case Intrinsic::hexagon_V6_vabsdiffub:
389
+ case Intrinsic::hexagon_V6_vabsdiffub_128B:
390
+ case Intrinsic::hexagon_V6_vabsdiffuh:
391
+ case Intrinsic::hexagon_V6_vabsdiffuh_128B:
392
+ case Intrinsic::hexagon_V6_vabsdiffw:
393
+ case Intrinsic::hexagon_V6_vabsdiffw_128B:
394
+ return true ;
395
+ default :
396
+ return false ;
397
+ }
398
+ }
399
+
329
400
bool HexagonVectorLoopCarriedReuse::isEquivalentOperation (Instruction *I1,
330
401
Instruction *I2) {
331
402
if (!I1->isSameOperationAs (I2))
@@ -360,13 +431,19 @@ bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1,
360
431
361
432
bool HexagonVectorLoopCarriedReuse::canReplace (Instruction *I) {
362
433
const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
363
- if (II &&
364
- (II->getIntrinsicID () == Intrinsic::hexagon_V6_hi ||
365
- II->getIntrinsicID () == Intrinsic::hexagon_V6_lo)) {
434
+ if (!II)
435
+ return true ;
436
+
437
+ switch (II->getIntrinsicID ()) {
438
+ case Intrinsic::hexagon_V6_hi:
439
+ case Intrinsic::hexagon_V6_lo:
440
+ case Intrinsic::hexagon_V6_hi_128B:
441
+ case Intrinsic::hexagon_V6_lo_128B:
366
442
LLVM_DEBUG (dbgs () << " Not considering for reuse: " << *II << " \n " );
367
443
return false ;
444
+ default :
445
+ return true ;
368
446
}
369
- return true ;
370
447
}
371
448
void HexagonVectorLoopCarriedReuse::findValueToReuse () {
372
449
for (auto *D : Dependences) {
@@ -427,34 +504,85 @@ void HexagonVectorLoopCarriedReuse::findValueToReuse() {
427
504
428
505
int NumOperands = I->getNumOperands ();
429
506
430
- for (int OpNo = 0 ; OpNo < NumOperands; ++OpNo) {
431
- Value *Op = I->getOperand (OpNo);
432
- Value *BEOp = BEUser->getOperand (OpNo);
433
-
434
- Instruction *OpInst = dyn_cast<Instruction>(Op);
435
- if (!OpInst) {
436
- if (Op == BEOp)
437
- continue ;
438
- // Do not allow reuse to occur when the operands may be different
439
- // values.
440
- BEUser = nullptr ;
441
- break ;
507
+ // Take operands of each PNUser one by one and try to find DepChain
508
+ // with every operand of the BEUser. If any of the operands of BEUser
509
+ // has DepChain with current operand of the PNUser, break the matcher
510
+ // loop. Keep doing this for Every PNUser operand. If PNUser operand
511
+ // does not have DepChain with any of the BEUser operand, break the
512
+ // outer matcher loop, mark the BEUser as null and reset the ReuseCandidate.
513
+ // This ensures that DepChain exist for all the PNUser operand with
514
+ // BEUser operand. This also ensures that DepChains are independent of
515
+ // the positions in PNUser and BEUser.
516
+ std::map<Instruction *, DepChain *> DepChains;
517
+ CallInst *C1 = dyn_cast<CallInst>(I);
518
+ if ((I && I->isCommutative ()) || (C1 && isCallInstCommutative (C1))) {
519
+ bool Found = false ;
520
+ for (int OpNo = 0 ; OpNo < NumOperands; ++OpNo) {
521
+ Value *Op = I->getOperand (OpNo);
522
+ Instruction *OpInst = dyn_cast<Instruction>(Op);
523
+ Found = false ;
524
+ for (int T = 0 ; T < NumOperands; ++T) {
525
+ Value *BEOp = BEUser->getOperand (T);
526
+ Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
527
+ if (!OpInst && !BEOpInst) {
528
+ if (Op == BEOp) {
529
+ Found = true ;
530
+ break ;
531
+ }
532
+ }
533
+
534
+ if ((OpInst && !BEOpInst) || (!OpInst && BEOpInst))
535
+ continue ;
536
+
537
+ DepChain *D = getDepChainBtwn (OpInst, BEOpInst, Iters);
538
+
539
+ if (D) {
540
+ Found = true ;
541
+ DepChains[OpInst] = D;
542
+ break ;
543
+ }
544
+ }
545
+ if (!Found) {
546
+ BEUser = nullptr ;
547
+ break ;
548
+ }
442
549
}
443
-
444
- Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
445
-
446
- if (!isDepChainBtwn (OpInst, BEOpInst, Iters)) {
447
- BEUser = nullptr ;
448
- break ;
550
+ } else {
551
+
552
+ for (int OpNo = 0 ; OpNo < NumOperands; ++OpNo) {
553
+ Value *Op = I->getOperand (OpNo);
554
+ Value *BEOp = BEUser->getOperand (OpNo);
555
+
556
+ Instruction *OpInst = dyn_cast<Instruction>(Op);
557
+ if (!OpInst) {
558
+ if (Op == BEOp)
559
+ continue ;
560
+ // Do not allow reuse to occur when the operands may be different
561
+ // values.
562
+ BEUser = nullptr ;
563
+ break ;
564
+ }
565
+
566
+ Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
567
+ DepChain *D = getDepChainBtwn (OpInst, BEOpInst, Iters);
568
+
569
+ if (D) {
570
+ DepChains[OpInst] = D;
571
+ } else {
572
+ BEUser = nullptr ;
573
+ break ;
574
+ }
449
575
}
450
576
}
451
577
if (BEUser) {
452
578
LLVM_DEBUG (dbgs () << " Found Value for reuse.\n " );
453
579
ReuseCandidate.Inst2Replace = I;
454
580
ReuseCandidate.BackedgeInst = BEUser;
581
+ ReuseCandidate.DepChains = DepChains;
582
+ ReuseCandidate.Iterations = Iters;
455
583
return ;
456
- } else
457
- ReuseCandidate.reset ();
584
+ }
585
+ ReuseCandidate.reset ();
458
586
}
459
587
}
460
588
}
@@ -474,27 +602,10 @@ void HexagonVectorLoopCarriedReuse::reuseValue() {
474
602
Instruction *Inst2Replace = ReuseCandidate.Inst2Replace ;
475
603
Instruction *BEInst = ReuseCandidate.BackedgeInst ;
476
604
int NumOperands = Inst2Replace->getNumOperands ();
477
- std::map<Instruction *, DepChain *> DepChains;
478
- int Iterations = - 1 ;
605
+ std::map<Instruction *, DepChain *> &DepChains = ReuseCandidate. DepChains ;
606
+ int Iterations = ReuseCandidate. Iterations ;
479
607
BasicBlock *LoopPH = CurLoop->getLoopPreheader ();
480
-
481
- for (int i = 0 ; i < NumOperands; ++i) {
482
- Instruction *I = dyn_cast<Instruction>(Inst2Replace->getOperand (i));
483
- if (!I)
484
- continue ;
485
- else {
486
- Instruction *J = cast<Instruction>(BEInst->getOperand (i));
487
- DepChain *D = getDepChainBtwn (I, J);
488
-
489
- assert (D &&
490
- " No DepChain between corresponding operands in ReuseCandidate\n " );
491
- if (Iterations == -1 )
492
- Iterations = D->iterations ();
493
- assert (Iterations == D->iterations () && " Iterations mismatch" );
494
- DepChains[I] = D;
495
- }
496
- }
497
-
608
+ assert (!DepChains.empty () && " No DepChains" );
498
609
LLVM_DEBUG (dbgs () << " reuseValue is making the following changes\n " );
499
610
500
611
SmallVector<Instruction *, 4 > InstsInPreheader;
@@ -603,20 +714,11 @@ void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I,
603
714
}
604
715
}
605
716
606
- bool HexagonVectorLoopCarriedReuse::isDepChainBtwn (Instruction *I1,
607
- Instruction *I2,
608
- int Iters) {
609
- for (auto *D : Dependences) {
610
- if (D->front () == I1 && D->back () == I2 && D->iterations () == Iters)
611
- return true ;
612
- }
613
- return false ;
614
- }
615
-
616
717
DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn (Instruction *I1,
617
- Instruction *I2) {
718
+ Instruction *I2,
719
+ int Iters) {
618
720
for (auto *D : Dependences) {
619
- if (D->front () == I1 && D->back () == I2)
721
+ if (D->front () == I1 && D->back () == I2 && D-> iterations () == Iters )
620
722
return D;
621
723
}
622
724
return nullptr ;
0 commit comments