@@ -34,6 +34,12 @@ struct AMDGPUImageDMaskIntrinsic {
3434 unsigned Intr;
3535};
3636
37+ struct D16Candidate {
38+ SmallVector<Instruction *, 4 > InstsToErase;
39+ Instruction *Replacee = nullptr ;
40+ Value *Index = nullptr ;
41+ };
42+
3743#define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
3844#include " InstCombineTables.inc"
3945
@@ -150,6 +156,67 @@ static std::optional<Instruction *> modifyIntrinsicCall(
150156 return RetValue;
151157}
152158
159+ // / Attempts to fold an image sample whose users are ExtractElement + FPTrunc
160+ // / chains into a D16-returning version.
161+ static std::optional<Instruction *>
162+ modifyImageIntrinsicForD16 (IntrinsicInst &II,
163+ const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr,
164+ InstCombiner &IC) {
165+ SmallVector<D16Candidate, 4 > Candidates;
166+
167+ // Collect all (ExtractElement, FPTrunc) pairs; abort on the first mismatch
168+ for (User *U : II.users ()) {
169+ auto *Ext = dyn_cast<ExtractElementInst>(U);
170+ if (!Ext || !Ext->hasOneUse ())
171+ return std::nullopt ;
172+
173+ auto *Tr = dyn_cast<FPTruncInst>(*Ext->user_begin ());
174+ if (!Tr || !Tr->getType ()->getScalarType ()->isHalfTy ())
175+ return std::nullopt ;
176+
177+ auto &Cand = Candidates.emplace_back ();
178+ Cand.InstsToErase = {Tr, Ext};
179+ Cand.Replacee = Tr;
180+ Cand.Index = Ext->getIndexOperand ();
181+ }
182+
183+ if (Candidates.empty ())
184+ return std::nullopt ;
185+
186+ // Build the new half-vector return type
187+ auto *VecTy = cast<VectorType>(II.getType ());
188+ Type *HalfVecTy = VecTy->getWithNewType (Type::getHalfTy (II.getContext ()));
189+
190+ // Obtain the original image sample intrinsic's signature
191+ // and replace its return type with the half-vector for D16 folding
192+ SmallVector<Type *, 8 > SigTys;
193+ Intrinsic::getIntrinsicSignature (II.getCalledFunction (), SigTys);
194+ SigTys[0 ] = HalfVecTy;
195+
196+ Function *HalfDecl = Intrinsic::getOrInsertDeclaration (
197+ II.getModule (), ImageDimIntr->Intr , SigTys);
198+
199+ II.mutateType (HalfVecTy);
200+ II.setCalledFunction (HalfDecl);
201+
202+ // Replace each chain with a single ExtractElement from the new D16 image
203+ IRBuilder<> B (II.getContext ());
204+ for (auto &[Insts, Replacee, Idx] : Candidates) {
205+ B.SetInsertPoint (Replacee);
206+ auto *HalfExtract = B.CreateExtractElement (&II, Idx);
207+ HalfExtract->takeName (Replacee);
208+ Replacee->replaceAllUsesWith (HalfExtract);
209+ }
210+
211+ // Erase the old instructions
212+ for (auto &[Insts, Replacee, Idx] : Candidates) {
213+ for (auto *I : Insts)
214+ IC.eraseInstFromFunction (*I);
215+ }
216+
217+ return &II;
218+ }
219+
153220static std::optional<Instruction *>
154221simplifyAMDGCNImageIntrinsic (const GCNSubtarget *ST,
155222 const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr,
@@ -249,65 +316,8 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
249316 }
250317 }
251318
252- // Only perform D16 folding if every user of the image sample is
253- // an ExtractElementInst immediately followed by an FPTrunc to half.
254- SmallVector<std::pair<ExtractElementInst *, FPTruncInst *>, 4 >
255- ExtractTruncPairs;
256- bool AllHalfExtracts = true ;
257-
258- for (User *U : II.users ()) {
259- auto *Ext = dyn_cast<ExtractElementInst>(U);
260- if (!Ext || !Ext->hasOneUse ()) {
261- AllHalfExtracts = false ;
262- break ;
263- }
264-
265- auto *Tr = dyn_cast<FPTruncInst>(*Ext->user_begin ());
266- if (!Tr || !Tr->getType ()->isHalfTy ()) {
267- AllHalfExtracts = false ;
268- break ;
269- }
270-
271- ExtractTruncPairs.emplace_back (Ext, Tr);
272- }
273-
274- if (!ExtractTruncPairs.empty () && AllHalfExtracts) {
275- auto *VecTy = cast<VectorType>(II.getType ());
276- Type *HalfVecTy =
277- VecTy->getWithNewType (Type::getHalfTy (II.getContext ()));
278-
279- // Obtain the original image sample intrinsic's signature
280- // and replace its return type with the half-vector for D16 folding
281- SmallVector<Type *, 8 > SigTys;
282- Intrinsic::getIntrinsicSignature (II.getCalledFunction (), SigTys);
283- SigTys[0 ] = HalfVecTy;
284-
285- Module *M = II.getModule ();
286- Function *HalfDecl =
287- Intrinsic::getOrInsertDeclaration (M, ImageDimIntr->Intr , SigTys);
288-
289- II.mutateType (HalfVecTy);
290- II.setCalledFunction (HalfDecl);
291-
292- IRBuilder<> Builder (II.getContext ());
293- for (auto &[Ext, Tr] : ExtractTruncPairs) {
294- Value *Idx = Ext->getIndexOperand ();
295-
296- Builder.SetInsertPoint (Tr);
297-
298- Value *HalfExtract = Builder.CreateExtractElement (&II, Idx);
299- HalfExtract->takeName (Tr);
300-
301- Tr->replaceAllUsesWith (HalfExtract);
302- }
303-
304- for (auto &[Ext, Tr] : ExtractTruncPairs) {
305- IC.eraseInstFromFunction (*Tr);
306- IC.eraseInstFromFunction (*Ext);
307- }
308-
309- return &II;
310- }
319+ if (auto FoldedII = modifyImageIntrinsicForD16 (II, ImageDimIntr, IC))
320+ return *FoldedII;
311321 }
312322 }
313323
0 commit comments