Skip to content

Commit 077f424

Browse files
committed
[AMDGPU] Hoist readlane/readfirst through unary/binary operands
When a read(first)lane is used on a binary operator and the intrinsic is the only user of the operator, we can move the read(first)lane into the operand if the other operand is uniform. Unfortunately IC doesn't let us access UniformityAnalysis and thus we can't truly check uniformity, we have to do with a basic uniformity check which only allows constants or trivially uniform intrinsics calls. We can also do the same for simple unary operations.
1 parent f09e245 commit 077f424

File tree

4 files changed

+666
-0
lines changed

4 files changed

+666
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,59 @@ bool GCNTTIImpl::simplifyDemandedLaneMaskArg(InstCombiner &IC,
481481
return false;
482482
}
483483

484+
Instruction *GCNTTIImpl::hoistReadLaneThroughOperand(InstCombiner &IC,
485+
IntrinsicInst &II) const {
486+
Instruction *Op = dyn_cast<Instruction>(II.getOperand(0));
487+
488+
// Only do this if both instructions are in the same block
489+
// (so the exec mask won't change) and the readlane is the only user of its
490+
// operand.
491+
if (!Op || !Op->hasOneUser() || Op->getParent() != II.getParent())
492+
return nullptr;
493+
494+
const bool IsReadLane = (II.getIntrinsicID() == Intrinsic::amdgcn_readlane);
495+
496+
// If this is a readlane, check that the second operand is a constant, or is
497+
// defined before Op so we know it's safe to move this intrinsic higher.
498+
Value *LaneID = nullptr;
499+
if (IsReadLane) {
500+
LaneID = II.getOperand(1);
501+
if (!isa<Constant>(LaneID) && !(isa<Instruction>(LaneID) &&
502+
cast<Instruction>(LaneID)->comesBefore(Op)))
503+
return nullptr;
504+
}
505+
506+
const auto DoIt = [&](unsigned OpIdx) -> Instruction * {
507+
SmallVector<Value *, 2> Ops{Op->getOperand(OpIdx)};
508+
if (IsReadLane)
509+
Ops.push_back(LaneID);
510+
511+
Instruction *NewII =
512+
IC.Builder.CreateIntrinsic(II.getType(), II.getIntrinsicID(), Ops);
513+
514+
Instruction &NewOp = *Op->clone();
515+
NewOp.setOperand(OpIdx, NewII);
516+
return &NewOp;
517+
};
518+
519+
// TODO: Are any operations more expensive on the SALU than VALU, and thus
520+
// need to be excluded here?
521+
522+
if (isa<UnaryOperator>(Op))
523+
return DoIt(0);
524+
525+
if (isa<BinaryOperator>(Op)) {
526+
// FIXME: If we had access to UniformityInfo here we could just check
527+
// if the operand is uniform.
528+
if (isTriviallyUniform(Op->getOperandUse(0)))
529+
return DoIt(1);
530+
if (isTriviallyUniform(Op->getOperandUse(1)))
531+
return DoIt(0);
532+
}
533+
534+
return nullptr;
535+
}
536+
484537
std::optional<Instruction *>
485538
GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
486539
Intrinsic::ID IID = II.getIntrinsicID();
@@ -1152,6 +1205,12 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
11521205
}
11531206
}
11541207

1208+
// If the readfirstlane reads the result of an operation that exists
1209+
// both in the SALU and VALU, we may be able to hoist it higher in order
1210+
// to scalarize the expression.
1211+
if (Instruction *Res = hoistReadLaneThroughOperand(IC, II))
1212+
return Res;
1213+
11551214
return std::nullopt;
11561215
}
11571216
case Intrinsic::amdgcn_writelane: {

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,9 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
224224
bool simplifyDemandedLaneMaskArg(InstCombiner &IC, IntrinsicInst &II,
225225
unsigned LaneAgIdx) const;
226226

227+
Instruction *hoistReadLaneThroughOperand(InstCombiner &IC,
228+
IntrinsicInst &II) const;
229+
227230
std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
228231
IntrinsicInst &II) const;
229232
std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(

0 commit comments

Comments
 (0)