@@ -34,6 +34,12 @@ struct AMDGPUImageDMaskIntrinsic {
34
34
unsigned Intr;
35
35
};
36
36
37
+ struct D16Candidate {
38
+ SmallVector<Instruction *, 4 > InstsToErase;
39
+ Instruction *Replacee = nullptr ;
40
+ Value *Index = nullptr ;
41
+ };
42
+
37
43
#define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
38
44
#include " InstCombineTables.inc"
39
45
@@ -150,6 +156,67 @@ static std::optional<Instruction *> modifyIntrinsicCall(
150
156
return RetValue;
151
157
}
152
158
159
+ // / Attempts to fold an image sample whose users are ExtractElement + FPTrunc
160
+ // / chains into a D16-returning version.
161
+ static std::optional<Instruction *>
162
+ modifyImageIntrinsicForD16 (IntrinsicInst &II,
163
+ const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr,
164
+ InstCombiner &IC) {
165
+ SmallVector<D16Candidate, 4 > Candidates;
166
+
167
+ // Collect all (ExtractElement, FPTrunc) pairs; abort on the first mismatch
168
+ for (User *U : II.users ()) {
169
+ auto *Ext = dyn_cast<ExtractElementInst>(U);
170
+ if (!Ext || !Ext->hasOneUse ())
171
+ return std::nullopt;
172
+
173
+ auto *Tr = dyn_cast<FPTruncInst>(*Ext->user_begin ());
174
+ if (!Tr || !Tr->getType ()->getScalarType ()->isHalfTy ())
175
+ return std::nullopt;
176
+
177
+ auto &Cand = Candidates.emplace_back ();
178
+ Cand.InstsToErase = {Tr, Ext};
179
+ Cand.Replacee = Tr;
180
+ Cand.Index = Ext->getIndexOperand ();
181
+ }
182
+
183
+ if (Candidates.empty ())
184
+ return std::nullopt;
185
+
186
+ // Build the new half-vector return type
187
+ auto *VecTy = cast<VectorType>(II.getType ());
188
+ Type *HalfVecTy = VecTy->getWithNewType (Type::getHalfTy (II.getContext ()));
189
+
190
+ // Obtain the original image sample intrinsic's signature
191
+ // and replace its return type with the half-vector for D16 folding
192
+ SmallVector<Type *, 8 > SigTys;
193
+ Intrinsic::getIntrinsicSignature (II.getCalledFunction (), SigTys);
194
+ SigTys[0 ] = HalfVecTy;
195
+
196
+ Function *HalfDecl = Intrinsic::getOrInsertDeclaration (
197
+ II.getModule (), ImageDimIntr->Intr , SigTys);
198
+
199
+ II.mutateType (HalfVecTy);
200
+ II.setCalledFunction (HalfDecl);
201
+
202
+ // Replace each chain with a single ExtractElement from the new D16 image
203
+ IRBuilder<> B (II.getContext ());
204
+ for (auto &[Insts, Replacee, Idx] : Candidates) {
205
+ B.SetInsertPoint (Replacee);
206
+ auto *HalfExtract = B.CreateExtractElement (&II, Idx);
207
+ HalfExtract->takeName (Replacee);
208
+ Replacee->replaceAllUsesWith (HalfExtract);
209
+ }
210
+
211
+ // Erase the old instructions
212
+ for (auto &[Insts, Replacee, Idx] : Candidates) {
213
+ for (auto *I : Insts)
214
+ IC.eraseInstFromFunction (*I);
215
+ }
216
+
217
+ return &II;
218
+ }
219
+
153
220
static std::optional<Instruction *>
154
221
simplifyAMDGCNImageIntrinsic (const GCNSubtarget *ST,
155
222
const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr,
@@ -249,65 +316,8 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
249
316
}
250
317
}
251
318
252
- // Only perform D16 folding if every user of the image sample is
253
- // an ExtractElementInst immediately followed by an FPTrunc to half.
254
- SmallVector<std::pair<ExtractElementInst *, FPTruncInst *>, 4 >
255
- ExtractTruncPairs;
256
- bool AllHalfExtracts = true ;
257
-
258
- for (User *U : II.users ()) {
259
- auto *Ext = dyn_cast<ExtractElementInst>(U);
260
- if (!Ext || !Ext->hasOneUse ()) {
261
- AllHalfExtracts = false ;
262
- break ;
263
- }
264
-
265
- auto *Tr = dyn_cast<FPTruncInst>(*Ext->user_begin ());
266
- if (!Tr || !Tr->getType ()->isHalfTy ()) {
267
- AllHalfExtracts = false ;
268
- break ;
269
- }
270
-
271
- ExtractTruncPairs.emplace_back (Ext, Tr);
272
- }
273
-
274
- if (!ExtractTruncPairs.empty () && AllHalfExtracts) {
275
- auto *VecTy = cast<VectorType>(II.getType ());
276
- Type *HalfVecTy =
277
- VecTy->getWithNewType (Type::getHalfTy (II.getContext ()));
278
-
279
- // Obtain the original image sample intrinsic's signature
280
- // and replace its return type with the half-vector for D16 folding
281
- SmallVector<Type *, 8 > SigTys;
282
- Intrinsic::getIntrinsicSignature (II.getCalledFunction (), SigTys);
283
- SigTys[0 ] = HalfVecTy;
284
-
285
- Module *M = II.getModule ();
286
- Function *HalfDecl =
287
- Intrinsic::getOrInsertDeclaration (M, ImageDimIntr->Intr , SigTys);
288
-
289
- II.mutateType (HalfVecTy);
290
- II.setCalledFunction (HalfDecl);
291
-
292
- IRBuilder<> Builder (II.getContext ());
293
- for (auto &[Ext, Tr] : ExtractTruncPairs) {
294
- Value *Idx = Ext->getIndexOperand ();
295
-
296
- Builder.SetInsertPoint (Tr);
297
-
298
- Value *HalfExtract = Builder.CreateExtractElement (&II, Idx);
299
- HalfExtract->takeName (Tr);
300
-
301
- Tr->replaceAllUsesWith (HalfExtract);
302
- }
303
-
304
- for (auto &[Ext, Tr] : ExtractTruncPairs) {
305
- IC.eraseInstFromFunction (*Tr);
306
- IC.eraseInstFromFunction (*Ext);
307
- }
308
-
309
- return &II;
310
- }
319
+ if (auto FoldedII = modifyImageIntrinsicForD16 (II, ImageDimIntr, IC))
320
+ return *FoldedII;
311
321
}
312
322
}
313
323
0 commit comments