Skip to content

Commit 3830e4e

Browse files
committed
AMDGPU: Create poison values instead of undef
These placeholders don't care about the finer points on the difference between the two.
1 parent 239b52d commit 3830e4e

14 files changed

+196
-199
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -626,7 +626,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
626626
if (NeedResult) {
627627
// Create a PHI node to get our new atomic result into the exit block.
628628
PHINode *const PHI = B.CreatePHI(Ty, 2);
629-
PHI->addIncoming(UndefValue::get(Ty), EntryBB);
629+
PHI->addIncoming(PoisonValue::get(Ty), EntryBB);
630630
PHI->addIncoming(NewI, SingleLaneTerminator->getParent());
631631

632632
// We need to broadcast the value who was the lowest active lane (the first
@@ -643,7 +643,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
643643
CallInst *const ReadFirstLaneHi =
644644
B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractHi);
645645
Value *const PartialInsert = B.CreateInsertElement(
646-
UndefValue::get(VecTy), ReadFirstLaneLo, B.getInt32(0));
646+
PoisonValue::get(VecTy), ReadFirstLaneLo, B.getInt32(0));
647647
Value *const Insert =
648648
B.CreateInsertElement(PartialInsert, ReadFirstLaneHi, B.getInt32(1));
649649
BroadcastI = B.CreateBitCast(Insert, Ty);
@@ -690,7 +690,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
690690
B.SetInsertPoint(PixelExitBB->getFirstNonPHI());
691691

692692
PHINode *const PHI = B.CreatePHI(Ty, 2);
693-
PHI->addIncoming(UndefValue::get(Ty), PixelEntryBB);
693+
PHI->addIncoming(PoisonValue::get(Ty), PixelEntryBB);
694694
PHI->addIncoming(Result, I.getParent());
695695
I.replaceAllUsesWith(PHI);
696696
} else {

llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -473,7 +473,7 @@ static Value *insertValues(IRBuilder<> &Builder,
473473
return Values[0];
474474
}
475475

476-
Value *NewVal = UndefValue::get(Ty);
476+
Value *NewVal = PoisonValue::get(Ty);
477477
for (int I = 0, E = Values.size(); I != E; ++I)
478478
NewVal = Builder.CreateInsertElement(NewVal, Values[I], I);
479479

@@ -794,7 +794,7 @@ bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) {
794794

795795
Value *NewFDiv = nullptr;
796796
if (auto *VT = dyn_cast<FixedVectorType>(FDiv.getType())) {
797-
NewFDiv = UndefValue::get(VT);
797+
NewFDiv = PoisonValue::get(VT);
798798

799799
// FIXME: Doesn't do the right thing for cases where the vector is partially
800800
// constant. This works when the scalarizer pass is run first.
@@ -1260,7 +1260,7 @@ bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) {
12601260
Builder.SetCurrentDebugLocation(I.getDebugLoc());
12611261

12621262
if (auto *VT = dyn_cast<FixedVectorType>(Ty)) {
1263-
NewDiv = UndefValue::get(VT);
1263+
NewDiv = PoisonValue::get(VT);
12641264

12651265
for (unsigned N = 0, E = VT->getNumElements(); N != E; ++N) {
12661266
Value *NumEltN = Builder.CreateExtractElement(Num, N);

llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -911,12 +911,9 @@ bool AMDGPUPromoteAllocaImpl::handleAlloca(AllocaInst &I, bool SufficientLDS) {
911911

912912
Type *GVTy = ArrayType::get(I.getAllocatedType(), WorkGroupSize);
913913
GlobalVariable *GV = new GlobalVariable(
914-
*Mod, GVTy, false, GlobalValue::InternalLinkage,
915-
UndefValue::get(GVTy),
916-
Twine(F->getName()) + Twine('.') + I.getName(),
917-
nullptr,
918-
GlobalVariable::NotThreadLocal,
919-
AMDGPUAS::LOCAL_ADDRESS);
914+
*Mod, GVTy, false, GlobalValue::InternalLinkage, PoisonValue::get(GVTy),
915+
Twine(F->getName()) + Twine('.') + I.getName(), nullptr,
916+
GlobalVariable::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS);
920917
GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
921918
GV->setAlignment(I.getAlign());
922919

llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,7 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
340340
B.SetCurrentDebugLocation(RI->getDebugLoc());
341341

342342
int RetIdx = 0;
343-
Value *NewRetVal = UndefValue::get(NewRetTy);
343+
Value *NewRetVal = PoisonValue::get(NewRetTy);
344344

345345
Value *RetVal = RI->getReturnValue();
346346
if (RetVal)
@@ -362,7 +362,7 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
362362
if (OutArgIndexes.count(Arg.getArgNo())) {
363363
// It's easier to preserve the type of the argument list. We rely on
364364
// DeadArgumentElimination to take care of these.
365-
StubCallArgs.push_back(UndefValue::get(Arg.getType()));
365+
StubCallArgs.push_back(PoisonValue::get(Arg.getType()));
366366
} else {
367367
StubCallArgs.push_back(&Arg);
368368
}

llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
224224
DummyReturnBB = BasicBlock::Create(F.getContext(),
225225
"DummyReturnBlock", &F);
226226
Type *RetTy = F.getReturnType();
227-
Value *RetVal = RetTy->isVoidTy() ? nullptr : UndefValue::get(RetTy);
227+
Value *RetVal = RetTy->isVoidTy() ? nullptr : PoisonValue::get(RetTy);
228228
ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB);
229229
ReturningBlocks.push_back(DummyReturnBB);
230230
}
@@ -286,7 +286,7 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
286286
// structurizer/annotator can't handle the multiple exits
287287

288288
Type *RetTy = F.getReturnType();
289-
Value *RetVal = RetTy->isVoidTy() ? nullptr : UndefValue::get(RetTy);
289+
Value *RetVal = RetTy->isVoidTy() ? nullptr : PoisonValue::get(RetTy);
290290
// Remove and delete the unreachable inst.
291291
UnreachableBlock->getTerminator()->eraseFromParent();
292292

llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ void SIAnnotateControlFlow::initialize(Module &M, const GCNSubtarget &ST) {
131131

132132
BoolTrue = ConstantInt::getTrue(Context);
133133
BoolFalse = ConstantInt::getFalse(Context);
134-
BoolUndef = UndefValue::get(Boolean);
134+
BoolUndef = PoisonValue::get(Boolean);
135135
IntMaskZero = ConstantInt::get(IntMask, 0);
136136

137137
If = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if, { IntMask });

llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_optimizations_mul_one.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ define amdgpu_cs void @atomic_add_and_format(<4 x i32> inreg %arg) {
6464
; IR-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 [[TMP7]], <4 x i32> [[ARG:%.*]], i32 0, i32 0, i32 0, i32 0)
6565
; IR-NEXT: br label [[TMP11]]
6666
; IR: 11:
67-
; IR-NEXT: [[TMP12:%.*]] = phi i32 [ undef, [[DOTENTRY:%.*]] ], [ [[TMP10]], [[TMP9]] ]
67+
; IR-NEXT: [[TMP12:%.*]] = phi i32 [ poison, [[DOTENTRY:%.*]] ], [ [[TMP10]], [[TMP9]] ]
6868
; IR-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP12]])
6969
; IR-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], [[TMP5]]
7070
; IR-NEXT: call void @llvm.amdgcn.struct.buffer.store.format.v4i32(<4 x i32> [[ARG]], <4 x i32> [[ARG]], i32 [[TMP14]], i32 0, i32 0, i32 0)
@@ -158,7 +158,7 @@ define amdgpu_cs void @atomic_sub_and_format(<4 x i32> inreg %arg) {
158158
; IR-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.struct.buffer.atomic.sub.i32(i32 [[TMP7]], <4 x i32> [[ARG:%.*]], i32 0, i32 0, i32 0, i32 0)
159159
; IR-NEXT: br label [[TMP11]]
160160
; IR: 11:
161-
; IR-NEXT: [[TMP12:%.*]] = phi i32 [ undef, [[DOTENTRY:%.*]] ], [ [[TMP10]], [[TMP9]] ]
161+
; IR-NEXT: [[TMP12:%.*]] = phi i32 [ poison, [[DOTENTRY:%.*]] ], [ [[TMP10]], [[TMP9]] ]
162162
; IR-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP12]])
163163
; IR-NEXT: [[TMP14:%.*]] = sub i32 [[TMP13]], [[TMP5]]
164164
; IR-NEXT: call void @llvm.amdgcn.struct.buffer.store.format.v4i32(<4 x i32> [[ARG]], <4 x i32> [[ARG]], i32 [[TMP14]], i32 0, i32 0, i32 0)
@@ -255,7 +255,7 @@ define amdgpu_cs void @atomic_xor_and_format(<4 x i32> inreg %arg) {
255255
; IR-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(i32 [[TMP8]], <4 x i32> [[ARG:%.*]], i32 0, i32 0, i32 0, i32 0)
256256
; IR-NEXT: br label [[TMP12]]
257257
; IR: 12:
258-
; IR-NEXT: [[TMP13:%.*]] = phi i32 [ undef, [[DOTENTRY:%.*]] ], [ [[TMP11]], [[TMP10]] ]
258+
; IR-NEXT: [[TMP13:%.*]] = phi i32 [ poison, [[DOTENTRY:%.*]] ], [ [[TMP11]], [[TMP10]] ]
259259
; IR-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP13]])
260260
; IR-NEXT: [[TMP15:%.*]] = and i32 [[TMP5]], 1
261261
; IR-NEXT: [[TMP16:%.*]] = xor i32 [[TMP14]], [[TMP15]]

llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ define amdgpu_kernel void @rcp_fdiv_fpmath(float addrspace(1)* %out, float %x) #
8282
; CHECK: %[[NO_A0:[0-9]+]] = extractelement <2 x float> %a, i64 0
8383
; CHECK: %[[NO_B0:[0-9]+]] = extractelement <2 x float> %b, i64 0
8484
; CHECK: %[[NO_FDIV0:[0-9]+]] = fdiv float %[[NO_A0]], %[[NO_B0]]
85-
; CHECK: %[[NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[NO_FDIV0]], i64 0
85+
; CHECK: %[[NO_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[NO_FDIV0]], i64 0
8686
; CHECK: %[[NO_A1:[0-9]+]] = extractelement <2 x float> %a, i64 1
8787
; CHECK: %[[NO_B1:[0-9]+]] = extractelement <2 x float> %b, i64 1
8888
; CHECK: %[[NO_FDIV1:[0-9]+]] = fdiv float %[[NO_A1]], %[[NO_B1]]
@@ -92,7 +92,7 @@ define amdgpu_kernel void @rcp_fdiv_fpmath(float addrspace(1)* %out, float %x) #
9292
; CHECK: %[[HALF_A0:[0-9]+]] = extractelement <2 x float> %a, i64 0
9393
; CHECK: %[[HALF_B0:[0-9]+]] = extractelement <2 x float> %b, i64 0
9494
; CHECK: %[[HALF_FDIV0:[0-9]+]] = fdiv float %[[HALF_A0]], %[[HALF_B0]]
95-
; CHECK: %[[HALF_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[HALF_FDIV0]], i64 0
95+
; CHECK: %[[HALF_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[HALF_FDIV0]], i64 0
9696
; CHECK: %[[HALF_A1:[0-9]+]] = extractelement <2 x float> %a, i64 1
9797
; CHECK: %[[HALF_B1:[0-9]+]] = extractelement <2 x float> %b, i64 1
9898
; CHECK: %[[HALF_FDIV1:[0-9]+]] = fdiv float %[[HALF_A1]], %[[HALF_B1]]
@@ -102,7 +102,7 @@ define amdgpu_kernel void @rcp_fdiv_fpmath(float addrspace(1)* %out, float %x) #
102102
; CHECK: %[[ONE_A0:[0-9]+]] = extractelement <2 x float> %a, i64 0
103103
; CHECK: %[[ONE_B0:[0-9]+]] = extractelement <2 x float> %b, i64 0
104104
; CHECK: %[[ONE_FDIV0:[0-9]+]] = fdiv float %[[ONE_A0]], %[[ONE_B0]]
105-
; CHECK: %[[ONE_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[ONE_FDIV0]], i64 0
105+
; CHECK: %[[ONE_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[ONE_FDIV0]], i64 0
106106
; CHECK: %[[ONE_A1:[0-9]+]] = extractelement <2 x float> %a, i64 1
107107
; CHECK: %[[ONE_B1:[0-9]+]] = extractelement <2 x float> %b, i64 1
108108
; CHECK: %[[ONE_FDIV1:[0-9]+]] = fdiv float %[[ONE_A1]], %[[ONE_B1]]
@@ -112,7 +112,7 @@ define amdgpu_kernel void @rcp_fdiv_fpmath(float addrspace(1)* %out, float %x) #
112112
; CHECK: %[[A0:[0-9]+]] = extractelement <2 x float> %a, i64 0
113113
; CHECK: %[[B0:[0-9]+]] = extractelement <2 x float> %b, i64 0
114114
; CHECK: %[[FDIV0:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A0]], float %[[B0]])
115-
; CHECK: %[[INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FDIV0]], i64 0
115+
; CHECK: %[[INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[FDIV0]], i64 0
116116
; CHECK: %[[A1:[0-9]+]] = extractelement <2 x float> %a, i64 1
117117
; CHECK: %[[B1:[0-9]+]] = extractelement <2 x float> %b, i64 1
118118
; CHECK: %[[FDIV1:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A1]], float %[[B1]])
@@ -136,47 +136,47 @@ define amdgpu_kernel void @fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2
136136
; CHECK-LABEL: @rcp_fdiv_fpmath_vector(
137137
; CHECK: %[[NO0:[0-9]+]] = extractelement <2 x float> %x, i64 0
138138
; CHECK: %[[NO_FDIV0:[0-9]+]] = fdiv float 1.000000e+00, %[[NO0]]
139-
; CHECK: %[[NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[NO_FDIV0]], i64 0
139+
; CHECK: %[[NO_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[NO_FDIV0]], i64 0
140140
; CHECK: %[[NO1:[0-9]+]] = extractelement <2 x float> %x, i64 1
141141
; CHECK: %[[NO_FDIV1:[0-9]+]] = fdiv float 1.000000e+00, %[[NO1]]
142142
; CHECK: %no.md = insertelement <2 x float> %[[NO_INS0]], float %[[NO_FDIV1]], i64 1
143143
; CHECK: store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out
144144

145145
; CHECK: %[[HALF0:[0-9]+]] = extractelement <2 x float> %x, i64 0
146146
; CHECK: %[[HALF_FDIV0:[0-9]+]] = fdiv float 1.000000e+00, %[[HALF0]]
147-
; CHECK: %[[HALF_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[HALF_FDIV0]], i64 0
147+
; CHECK: %[[HALF_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[HALF_FDIV0]], i64 0
148148
; CHECK: %[[HALF1:[0-9]+]] = extractelement <2 x float> %x, i64 1
149149
; CHECK: %[[HALF_FDIV1:[0-9]+]] = fdiv float 1.000000e+00, %[[HALF1]]
150150
; CHECK: %md.half.ulp = insertelement <2 x float> %[[HALF_INS0]], float %[[HALF_FDIV1]], i64 1
151151
; CHECK: store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out
152152

153153
; CHECK: %[[AFN_NO0:[0-9]+]] = extractelement <2 x float> %x, i64 0
154154
; CHECK: %[[AFN_NO_FDIV0:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_NO0]])
155-
; CHECK: %[[AFN_NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[AFN_NO_FDIV0]], i64 0
155+
; CHECK: %[[AFN_NO_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[AFN_NO_FDIV0]], i64 0
156156
; CHECK: %[[AFN_NO1:[0-9]+]] = extractelement <2 x float> %x, i64 1
157157
; CHECK: %[[AFN_NO_FDIV1:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_NO1]])
158158
; CHECK: %afn.no.md = insertelement <2 x float> %[[AFN_NO_INS0]], float %[[AFN_NO_FDIV1]], i64 1
159159
; CHECK: store volatile <2 x float> %afn.no.md, <2 x float> addrspace(1)* %out
160160

161161
; CHECK: %[[FAST_NO0:[0-9]+]] = extractelement <2 x float> %x, i64 0
162162
; CHECK: %[[FAST_NO_RCP0:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_NO0]])
163-
; CHECK: %[[FAST_NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FAST_NO_RCP0]], i64 0
163+
; CHECK: %[[FAST_NO_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[FAST_NO_RCP0]], i64 0
164164
; CHECK: %[[FAST_NO1:[0-9]+]] = extractelement <2 x float> %x, i64 1
165165
; CHECK: %[[FAST_NO_RCP1:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_NO1]])
166166
; CHECK: %fast.no.md = insertelement <2 x float> %[[FAST_NO_INS0]], float %[[FAST_NO_RCP1]], i64 1
167167
; CHECK: store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out
168168

169169
; CHECK: %[[AFN_250:[0-9]+]] = extractelement <2 x float> %x, i64 0
170170
; CHECK: %[[AFN_25_RCP0:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_250]])
171-
; CHECK: %[[AFN_25_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[AFN_25_RCP0]], i64 0
171+
; CHECK: %[[AFN_25_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[AFN_25_RCP0]], i64 0
172172
; CHECK: %[[AFN_251:[0-9]+]] = extractelement <2 x float> %x, i64 1
173173
; CHECK: %[[AFN_25_RCP1:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_251]])
174174
; CHECK: %afn.25ulp = insertelement <2 x float> %[[AFN_25_INS0]], float %[[AFN_25_RCP1]], i64 1
175175
; CHECK: store volatile <2 x float> %afn.25ulp, <2 x float> addrspace(1)* %out
176176

177177
; CHECK: %[[FAST_250:[0-9]+]] = extractelement <2 x float> %x, i64 0
178178
; CHECK: %[[FAST_25_RCP0:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_250]])
179-
; CHECK: %[[FAST_25_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FAST_25_RCP0]], i64 0
179+
; CHECK: %[[FAST_25_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[FAST_25_RCP0]], i64 0
180180
; CHECK: %[[FAST_251:[0-9]+]] = extractelement <2 x float> %x, i64 1
181181
; CHECK: %[[FAST_25_RCP1:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_251]])
182182
; CHECK: %fast.25ulp = insertelement <2 x float> %[[FAST_25_INS0]], float %[[FAST_25_RCP1]], i64 1
@@ -206,15 +206,15 @@ define amdgpu_kernel void @rcp_fdiv_fpmath_vector(<2 x float> addrspace(1)* %out
206206
; CHECK-LABEL: @rcp_fdiv_fpmath_vector_nonsplat(
207207
; CHECK: %[[NO0:[0-9]+]] = extractelement <2 x float> %x, i64 0
208208
; CHECK: %[[NO_FDIV0:[0-9]+]] = fdiv float 1.000000e+00, %[[NO0]]
209-
; CHECK: %[[NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[NO_FDIV0]], i64 0
209+
; CHECK: %[[NO_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[NO_FDIV0]], i64 0
210210
; CHECK: %[[NO1:[0-9]+]] = extractelement <2 x float> %x, i64 1
211211
; CHECK: %[[NO_FDIV1:[0-9]+]] = fdiv float 2.000000e+00, %[[NO1]]
212212
; CHECK: %no.md = insertelement <2 x float> %[[NO_INS0]], float %[[NO_FDIV1]], i64 1
213213
; CHECK: store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out
214214

215215
; CHECK: %[[AFN_NO0:[0-9]+]] = extractelement <2 x float> %x, i64 0
216216
; CHECK: %[[AFN_NO_FDIV0:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_NO0]])
217-
; CHECK: %[[AFN_NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[AFN_NO_FDIV0]], i64 0
217+
; CHECK: %[[AFN_NO_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[AFN_NO_FDIV0]], i64 0
218218
; CHECK: %[[AFN_NO1:[0-9]+]] = extractelement <2 x float> %x, i64 1
219219
; CHECK: %[[AFN_NO_FDIV1:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_NO1]])
220220
; CHECK: %[[AFN_NO_MUL1:[0-9]+]] = fmul afn float 2.000000e+00, %[[AFN_NO_FDIV1]]
@@ -223,7 +223,7 @@ define amdgpu_kernel void @rcp_fdiv_fpmath_vector(<2 x float> addrspace(1)* %out
223223

224224
; CHECK: %[[FAST_NO0:[0-9]+]] = extractelement <2 x float> %x, i64 0
225225
; CHECK: %[[FAST_NO_RCP0:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_NO0]])
226-
; CHECK: %[[FAST_NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FAST_NO_RCP0]], i64 0
226+
; CHECK: %[[FAST_NO_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[FAST_NO_RCP0]], i64 0
227227
; CHECK: %[[FAST_NO1:[0-9]+]] = extractelement <2 x float> %x, i64 1
228228
; CHECK: %[[FAST_NO_RCP1:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_NO1]])
229229
; CHECK: %[[FAST_NO_MUL1:[0-9]+]] = fmul fast float 2.000000e+00, %[[FAST_NO_RCP1]]
@@ -232,7 +232,7 @@ define amdgpu_kernel void @rcp_fdiv_fpmath_vector(<2 x float> addrspace(1)* %out
232232

233233
; CHECK: %[[AFN_250:[0-9]+]] = extractelement <2 x float> %x, i64 0
234234
; CHECK: %[[AFN_25_RCP0:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_250]])
235-
; CHECK: %[[AFN_25_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[AFN_25_RCP0]], i64 0
235+
; CHECK: %[[AFN_25_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[AFN_25_RCP0]], i64 0
236236
; CHECK: %[[AFN_251:[0-9]+]] = extractelement <2 x float> %x, i64 1
237237
; CHECK: %[[AFN_25_RCP1:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_251]])
238238
; CHECK: %[[AFN_25_MUL1:[0-9]+]] = fmul afn float 2.000000e+00, %[[AFN_25_RCP1]]
@@ -241,7 +241,7 @@ define amdgpu_kernel void @rcp_fdiv_fpmath_vector(<2 x float> addrspace(1)* %out
241241

242242
; CHECK: %[[FAST_250:[0-9]+]] = extractelement <2 x float> %x, i64 0
243243
; CHECK: %[[FAST_25_RCP0:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_250]])
244-
; CHECK: %[[FAST_25_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FAST_25_RCP0]], i64 0
244+
; CHECK: %[[FAST_25_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[FAST_25_RCP0]], i64 0
245245
; CHECK: %[[FAST_251:[0-9]+]] = extractelement <2 x float> %x, i64 1
246246
; CHECK: %[[FAST_25_RCP1:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_251]])
247247
; CHECK: %[[FAST_25_MUL1:[0-9]+]] = fmul fast float 2.000000e+00, %[[FAST_25_RCP1]]
@@ -271,7 +271,7 @@ define amdgpu_kernel void @rcp_fdiv_fpmath_vector_nonsplat(<2 x float> addrspace
271271
; CHECK: %[[AFN_B0:[0-9]+]] = extractelement <2 x float> %y, i64 0
272272
; CHECK: %[[AFN_RCP0:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_B0]])
273273
; CHECK: %[[AFN_MUL0:[0-9]+]] = fmul afn float %[[AFN_A0]], %[[AFN_RCP0]]
274-
; CHECK: %[[AFN_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[AFN_MUL0]], i64 0
274+
; CHECK: %[[AFN_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[AFN_MUL0]], i64 0
275275
; CHECK: %[[AFN_A1:[0-9]+]] = extractelement <2 x float> %x.insert, i64 1
276276
; CHECK: %[[AFN_B1:[0-9]+]] = extractelement <2 x float> %y, i64 1
277277
; CHECK: %[[AFN_RCP1:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_B1]])
@@ -283,7 +283,7 @@ define amdgpu_kernel void @rcp_fdiv_fpmath_vector_nonsplat(<2 x float> addrspace
283283
; CHECK: %[[FAST_B0:[0-9]+]] = extractelement <2 x float> %y, i64 0
284284
; CHECK: %[[FAST_RCP0:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_B0]])
285285
; CHECK: %[[FAST_MUL0:[0-9]+]] = fmul fast float %[[FAST_A0]], %[[FAST_RCP0]]
286-
; CHECK: %[[FAST_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FAST_MUL0]], i64 0
286+
; CHECK: %[[FAST_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[FAST_MUL0]], i64 0
287287
; CHECK: %[[FAST_A1:[0-9]+]] = extractelement <2 x float> %x.insert, i64 1
288288
; CHECK: %[[FAST_B1:[0-9]+]] = extractelement <2 x float> %y, i64 1
289289
; CHECK: %[[FAST_RCP1:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_B1]])

0 commit comments

Comments
 (0)