Skip to content

Commit f94083d

Browse files
committed
[AMDGPU][NFC] Refactor D16 folding for image samples with multiple ExtractElement+FPTrunc chains
1 parent 64fe323 commit f94083d

File tree

1 file changed

+69
-59
lines changed

1 file changed

+69
-59
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp

Lines changed: 69 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ struct AMDGPUImageDMaskIntrinsic {
3434
unsigned Intr;
3535
};
3636

37+
struct D16Candidate {
38+
SmallVector<Instruction *, 4> InstsToErase;
39+
Instruction *Replacee = nullptr;
40+
Value *Index = nullptr;
41+
};
42+
3743
#define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
3844
#include "InstCombineTables.inc"
3945

@@ -150,6 +156,67 @@ static std::optional<Instruction *> modifyIntrinsicCall(
150156
return RetValue;
151157
}
152158

159+
/// Attempts to fold an image sample whose users are ExtractElement + FPTrunc
160+
/// chains into a D16-returning version.
161+
static std::optional<Instruction *>
162+
modifyImageIntrinsicForD16(IntrinsicInst &II,
163+
const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr,
164+
InstCombiner &IC) {
165+
SmallVector<D16Candidate, 4> Candidates;
166+
167+
// Collect all (ExtractElement, FPTrunc) pairs; abort on the first mismatch
168+
for (User *U : II.users()) {
169+
auto *Ext = dyn_cast<ExtractElementInst>(U);
170+
if (!Ext || !Ext->hasOneUse())
171+
return std::nullopt;
172+
173+
auto *Tr = dyn_cast<FPTruncInst>(*Ext->user_begin());
174+
if (!Tr || !Tr->getType()->getScalarType()->isHalfTy())
175+
return std::nullopt;
176+
177+
auto &Cand = Candidates.emplace_back();
178+
Cand.InstsToErase = {Tr, Ext};
179+
Cand.Replacee = Tr;
180+
Cand.Index = Ext->getIndexOperand();
181+
}
182+
183+
if (Candidates.empty())
184+
return std::nullopt;
185+
186+
// Build the new half-vector return type
187+
auto *VecTy = cast<VectorType>(II.getType());
188+
Type *HalfVecTy = VecTy->getWithNewType(Type::getHalfTy(II.getContext()));
189+
190+
// Obtain the original image sample intrinsic's signature
191+
// and replace its return type with the half-vector for D16 folding
192+
SmallVector<Type *, 8> SigTys;
193+
Intrinsic::getIntrinsicSignature(II.getCalledFunction(), SigTys);
194+
SigTys[0] = HalfVecTy;
195+
196+
Function *HalfDecl = Intrinsic::getOrInsertDeclaration(
197+
II.getModule(), ImageDimIntr->Intr, SigTys);
198+
199+
II.mutateType(HalfVecTy);
200+
II.setCalledFunction(HalfDecl);
201+
202+
// Replace each chain with a single ExtractElement from the new D16 image
203+
IRBuilder<> B(II.getContext());
204+
for (auto &[Insts, Replacee, Idx] : Candidates) {
205+
B.SetInsertPoint(Replacee);
206+
auto *HalfExtract = B.CreateExtractElement(&II, Idx);
207+
HalfExtract->takeName(Replacee);
208+
Replacee->replaceAllUsesWith(HalfExtract);
209+
}
210+
211+
// Erase the old instructions
212+
for (auto &[Insts, Replacee, Idx] : Candidates) {
213+
for (auto *I : Insts)
214+
IC.eraseInstFromFunction(*I);
215+
}
216+
217+
return &II;
218+
}
219+
153220
static std::optional<Instruction *>
154221
simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
155222
const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr,
@@ -249,65 +316,8 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
249316
}
250317
}
251318

252-
// Only perform D16 folding if every user of the image sample is
253-
// an ExtractElementInst immediately followed by an FPTrunc to half.
254-
SmallVector<std::pair<ExtractElementInst *, FPTruncInst *>, 4>
255-
ExtractTruncPairs;
256-
bool AllHalfExtracts = true;
257-
258-
for (User *U : II.users()) {
259-
auto *Ext = dyn_cast<ExtractElementInst>(U);
260-
if (!Ext || !Ext->hasOneUse()) {
261-
AllHalfExtracts = false;
262-
break;
263-
}
264-
265-
auto *Tr = dyn_cast<FPTruncInst>(*Ext->user_begin());
266-
if (!Tr || !Tr->getType()->isHalfTy()) {
267-
AllHalfExtracts = false;
268-
break;
269-
}
270-
271-
ExtractTruncPairs.emplace_back(Ext, Tr);
272-
}
273-
274-
if (!ExtractTruncPairs.empty() && AllHalfExtracts) {
275-
auto *VecTy = cast<VectorType>(II.getType());
276-
Type *HalfVecTy =
277-
VecTy->getWithNewType(Type::getHalfTy(II.getContext()));
278-
279-
// Obtain the original image sample intrinsic's signature
280-
// and replace its return type with the half-vector for D16 folding
281-
SmallVector<Type *, 8> SigTys;
282-
Intrinsic::getIntrinsicSignature(II.getCalledFunction(), SigTys);
283-
SigTys[0] = HalfVecTy;
284-
285-
Module *M = II.getModule();
286-
Function *HalfDecl =
287-
Intrinsic::getOrInsertDeclaration(M, ImageDimIntr->Intr, SigTys);
288-
289-
II.mutateType(HalfVecTy);
290-
II.setCalledFunction(HalfDecl);
291-
292-
IRBuilder<> Builder(II.getContext());
293-
for (auto &[Ext, Tr] : ExtractTruncPairs) {
294-
Value *Idx = Ext->getIndexOperand();
295-
296-
Builder.SetInsertPoint(Tr);
297-
298-
Value *HalfExtract = Builder.CreateExtractElement(&II, Idx);
299-
HalfExtract->takeName(Tr);
300-
301-
Tr->replaceAllUsesWith(HalfExtract);
302-
}
303-
304-
for (auto &[Ext, Tr] : ExtractTruncPairs) {
305-
IC.eraseInstFromFunction(*Tr);
306-
IC.eraseInstFromFunction(*Ext);
307-
}
308-
309-
return &II;
310-
}
319+
if (auto FoldedII = modifyImageIntrinsicForD16(II, ImageDimIntr, IC))
320+
return *FoldedII;
311321
}
312322
}
313323

0 commit comments

Comments
 (0)