@@ -275,56 +275,61 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
275275 // an ExtractElementInst immediately followed by an FPTrunc to half.
276276 SmallVector<std::pair<ExtractElementInst *, FPTruncInst *>, 4 >
277277 ExtractTruncPairs;
278+ bool AllHalfExtracts = true ;
278279
279280 for (User *U : II.users ()) {
280281 auto *Ext = dyn_cast<ExtractElementInst>(U);
281- if (!Ext || !Ext->hasOneUse ())
282- return std::nullopt ;
282+ if (!Ext || !Ext->hasOneUse ()) {
283+ AllHalfExtracts = false ;
284+ break ;
285+ }
283286
284287 auto *Tr = dyn_cast<FPTruncInst>(*Ext->user_begin ());
285- if (!Tr || !Tr->getType ()->isHalfTy ())
286- return std::nullopt ;
288+ if (!Tr || !Tr->getType ()->isHalfTy ()) {
289+ AllHalfExtracts = false ;
290+ break ;
291+ }
287292
288293 ExtractTruncPairs.emplace_back (Ext, Tr);
289294 }
290295
291- if (ExtractTruncPairs.empty ())
292- return std::nullopt ;
296+ if (!ExtractTruncPairs.empty () && AllHalfExtracts) {
297+ auto *VecTy = cast<VectorType>(II.getType ());
298+ Type *HalfVecTy =
299+ VecTy->getWithNewType (Type::getHalfTy (II.getContext ()));
293300
294- auto *VecTy = cast<VectorType>(II.getType ());
295- Type *HalfVecTy = VecTy->getWithNewType (Type::getHalfTy (II.getContext ()));
301+ // Obtain the original image sample intrinsic's signature
302+ // and replace its return type with the half-vector for D16 folding
303+ SmallVector<Type *, 8 > SigTys;
304+ Intrinsic::getIntrinsicSignature (II.getCalledFunction (), SigTys);
305+ SigTys[0 ] = HalfVecTy;
296306
297- // Obtain the original image sample intrinsic's signature
298- // and replace its return type with the half-vector for D16 folding
299- SmallVector<Type *, 8 > SigTys;
300- Intrinsic::getIntrinsicSignature (II.getCalledFunction (), SigTys);
301- SigTys[0 ] = HalfVecTy;
307+ Module *M = II.getModule ();
308+ Function *HalfDecl =
309+ Intrinsic::getOrInsertDeclaration (M, ImageDimIntr->Intr , SigTys);
302310
303- Module *M = II.getModule ();
304- Function *HalfDecl =
305- Intrinsic::getOrInsertDeclaration (M, ImageDimIntr->Intr , SigTys);
311+ II.mutateType (HalfVecTy);
312+ II.setCalledFunction (HalfDecl);
306313
307- II.mutateType (HalfVecTy);
308- II.setCalledFunction (HalfDecl);
314+ IRBuilder<> Builder (II.getContext ());
315+ for (auto &[Ext, Tr] : ExtractTruncPairs) {
316+ Value *Idx = Ext->getIndexOperand ();
309317
310- IRBuilder<> Builder (II.getContext ());
311- for (auto &[Ext, Tr] : ExtractTruncPairs) {
312- Value *Idx = Ext->getIndexOperand ();
318+ Builder.SetInsertPoint (Tr);
313319
314- Builder.SetInsertPoint (Tr);
320+ Value *HalfExtract = Builder.CreateExtractElement (&II, Idx);
321+ HalfExtract->takeName (Tr);
315322
316- Value *HalfExtract = Builder. CreateExtractElement (&II, Idx );
317- HalfExtract-> takeName (Tr);
323+ Tr-> replaceAllUsesWith (HalfExtract );
324+ }
318325
319- Tr->replaceAllUsesWith (HalfExtract);
320- }
326+ for (auto &[Ext, Tr] : ExtractTruncPairs) {
327+ IC.eraseInstFromFunction (*Tr);
328+ IC.eraseInstFromFunction (*Ext);
329+ }
321330
322- for (auto &[Ext, Tr] : ExtractTruncPairs) {
323- IC.eraseInstFromFunction (*Tr);
324- IC.eraseInstFromFunction (*Ext);
331+ return &II;
325332 }
326-
327- return &II;
328333 }
329334 }
330335
0 commit comments