|
18 | 18 | #include "AMDGPUTargetTransformInfo.h"
|
19 | 19 | #include "GCNSubtarget.h"
|
20 | 20 | #include "llvm/ADT/FloatingPointMode.h"
|
| 21 | +#include "llvm/IR/Dominators.h" |
21 | 22 | #include "llvm/IR/IntrinsicsAMDGPU.h"
|
22 | 23 | #include "llvm/Transforms/InstCombine/InstCombiner.h"
|
23 | 24 | #include <optional>
|
@@ -481,6 +482,98 @@ bool GCNTTIImpl::simplifyDemandedLaneMaskArg(InstCombiner &IC,
|
481 | 482 | return false;
|
482 | 483 | }
|
483 | 484 |
|
| 485 | +static CallInst *rewriteCall(IRBuilderBase &B, CallInst &Old, |
| 486 | + Function &NewCallee, ArrayRef<Value *> Ops) { |
| 487 | + SmallVector<OperandBundleDef, 2> OpBundles; |
| 488 | + Old.getOperandBundlesAsDefs(OpBundles); |
| 489 | + |
| 490 | + CallInst *NewCall = B.CreateCall(&NewCallee, Ops, OpBundles); |
| 491 | + NewCall->takeName(&Old); |
| 492 | + return NewCall; |
| 493 | +} |
| 494 | + |
| 495 | +Instruction * |
| 496 | +GCNTTIImpl::hoistLaneIntrinsicThroughOperand(InstCombiner &IC, |
| 497 | + IntrinsicInst &II) const { |
| 498 | + const auto IID = II.getIntrinsicID(); |
| 499 | + assert(IID == Intrinsic::amdgcn_readlane || |
| 500 | + IID == Intrinsic::amdgcn_readfirstlane || |
| 501 | + IID == Intrinsic::amdgcn_permlane64); |
| 502 | + |
| 503 | + Instruction *OpInst = dyn_cast<Instruction>(II.getOperand(0)); |
| 504 | + |
| 505 | + // Only do this if both instructions are in the same block |
| 506 | + // (so the exec mask won't change) and the readlane is the only user of its |
| 507 | + // operand. |
| 508 | + if (!OpInst || !OpInst->hasOneUser() || OpInst->getParent() != II.getParent()) |
| 509 | + return nullptr; |
| 510 | + |
| 511 | + const bool IsReadLane = (IID == Intrinsic::amdgcn_readlane); |
| 512 | + |
| 513 | + // If this is a readlane, check that the second operand is a constant, or is |
| 514 | + // defined before OpInst so we know it's safe to move this intrinsic higher. |
| 515 | + Value *LaneID = nullptr; |
| 516 | + if (IsReadLane) { |
| 517 | + LaneID = II.getOperand(1); |
| 518 | + |
| 519 | + // readlane take an extra operand for the lane ID, so we must check if that |
| 520 | + // LaneID value can be used at the point where we want to move the |
| 521 | + // intrinsic. |
| 522 | + if (auto *LaneIDInst = dyn_cast<Instruction>(LaneID)) { |
| 523 | + if (!IC.getDominatorTree().dominates(LaneIDInst, OpInst)) |
| 524 | + return nullptr; |
| 525 | + } |
| 526 | + } |
| 527 | + |
| 528 | + // Hoist the intrinsic (II) through OpInst. |
| 529 | + // |
| 530 | + // (II (OpInst x)) -> (OpInst (II x)) |
| 531 | + const auto DoIt = [&](unsigned OpIdx, |
| 532 | + Function *NewIntrinsic) -> Instruction * { |
| 533 | + SmallVector<Value *, 2> Ops{OpInst->getOperand(OpIdx)}; |
| 534 | + if (IsReadLane) |
| 535 | + Ops.push_back(LaneID); |
| 536 | + |
| 537 | + // Rewrite the intrinsic call. |
| 538 | + CallInst *NewII = rewriteCall(IC.Builder, II, *NewIntrinsic, Ops); |
| 539 | + |
| 540 | + // Rewrite OpInst so it takes the result of the intrinsic now. |
| 541 | + Instruction &NewOp = *OpInst->clone(); |
| 542 | + NewOp.setOperand(OpIdx, NewII); |
| 543 | + return &NewOp; |
| 544 | + }; |
| 545 | + |
| 546 | + // TODO(?): Should we do more with permlane64? |
| 547 | + if (IID == Intrinsic::amdgcn_permlane64 && !isa<BitCastInst>(OpInst)) |
| 548 | + return nullptr; |
| 549 | + |
| 550 | + if (isa<UnaryOperator>(OpInst)) |
| 551 | + return DoIt(0, II.getCalledFunction()); |
| 552 | + |
| 553 | + if (isa<CastInst>(OpInst)) { |
| 554 | + Value *Src = OpInst->getOperand(0); |
| 555 | + Type *SrcTy = Src->getType(); |
| 556 | + if (!isTypeLegal(SrcTy)) |
| 557 | + return nullptr; |
| 558 | + |
| 559 | + Function *Remangled = |
| 560 | + Intrinsic::getOrInsertDeclaration(II.getModule(), IID, {SrcTy}); |
| 561 | + return DoIt(0, Remangled); |
| 562 | + } |
| 563 | + |
| 564 | + // We can also hoist through binary operators if the other operand is uniform. |
| 565 | + if (isa<BinaryOperator>(OpInst)) { |
| 566 | + // FIXME: If we had access to UniformityInfo here we could just check |
| 567 | + // if the operand is uniform. |
| 568 | + if (isTriviallyUniform(OpInst->getOperandUse(0))) |
| 569 | + return DoIt(1, II.getCalledFunction()); |
| 570 | + if (isTriviallyUniform(OpInst->getOperandUse(1))) |
| 571 | + return DoIt(0, II.getCalledFunction()); |
| 572 | + } |
| 573 | + |
| 574 | + return nullptr; |
| 575 | +} |
| 576 | + |
484 | 577 | std::optional<Instruction *>
|
485 | 578 | GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
|
486 | 579 | Intrinsic::ID IID = II.getIntrinsicID();
|
@@ -1151,6 +1244,11 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
|
1151 | 1244 | simplifyDemandedLaneMaskArg(IC, II, 1))
|
1152 | 1245 | return &II;
|
1153 | 1246 |
|
| 1247 | + if (IID != Intrinsic::amdgcn_ds_bpermute) { |
| 1248 | + if (Instruction *Res = hoistLaneIntrinsicThroughOperand(IC, II)) |
| 1249 | + return Res; |
| 1250 | + } |
| 1251 | + |
1154 | 1252 | return std::nullopt;
|
1155 | 1253 | }
|
1156 | 1254 | case Intrinsic::amdgcn_writelane: {
|
|
0 commit comments