intel
diff --git a/‎llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
Lines changed: 3 additions & 3 deletions b/‎llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
Lines changed: 3 additions & 3 deletions
diff --git a/‎llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
Lines changed: 3 additions & 3 deletions b/‎llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
Lines changed: 3 additions & 3 deletions
diff --git a/‎llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
Lines changed: 3 additions & 6 deletions b/‎llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
Lines changed: 3 additions & 6 deletions
diff --git a/‎llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
Lines changed: 2 additions & 2 deletions b/‎llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
Lines changed: 2 additions & 2 deletions
diff --git a/‎llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
Lines changed: 2 additions & 2 deletions b/‎llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
Lines changed: 2 additions & 2 deletions
diff --git a/‎llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
Lines changed: 1 addition & 1 deletion b/‎llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_optimizations_mul_one.ll
Lines changed: 3 additions & 3 deletions b/‎llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_optimizations_mul_one.ll
Lines changed: 3 additions & 3 deletions
diff --git a/‎llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
Lines changed: 17 additions & 17 deletions b/‎llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
Lines changed: 17 additions & 17 deletions
@@ -626,7 +626,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
   if (NeedResult) {
     // Create a PHI node to get our new atomic result into the exit block.
     PHINode *const PHI = B.CreatePHI(Ty, 2);
-    PHI->addIncoming(UndefValue::get(Ty), EntryBB);
+    PHI->addIncoming(PoisonValue::get(Ty), EntryBB);
     PHI->addIncoming(NewI, SingleLaneTerminator->getParent());
 
     // We need to broadcast the value who was the lowest active lane (the first
@@ -643,7 +643,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
       CallInst *const ReadFirstLaneHi =
           B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractHi);
       Value *const PartialInsert = B.CreateInsertElement(
-          UndefValue::get(VecTy), ReadFirstLaneLo, B.getInt32(0));
+          PoisonValue::get(VecTy), ReadFirstLaneLo, B.getInt32(0));
       Value *const Insert =
           B.CreateInsertElement(PartialInsert, ReadFirstLaneHi, B.getInt32(1));
       BroadcastI = B.CreateBitCast(Insert, Ty);
@@ -690,7 +690,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
       B.SetInsertPoint(PixelExitBB->getFirstNonPHI());
 
       PHINode *const PHI = B.CreatePHI(Ty, 2);
-      PHI->addIncoming(UndefValue::get(Ty), PixelEntryBB);
+      PHI->addIncoming(PoisonValue::get(Ty), PixelEntryBB);
       PHI->addIncoming(Result, I.getParent());
       I.replaceAllUsesWith(PHI);
     } else {
 
@@ -473,7 +473,7 @@ static Value *insertValues(IRBuilder<> &Builder,
     return Values[0];
   }
 
-  Value *NewVal = UndefValue::get(Ty);
+  Value *NewVal = PoisonValue::get(Ty);
   for (int I = 0, E = Values.size(); I != E; ++I)
     NewVal = Builder.CreateInsertElement(NewVal, Values[I], I);
 
@@ -794,7 +794,7 @@ bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) {
 
   Value *NewFDiv = nullptr;
   if (auto *VT = dyn_cast<FixedVectorType>(FDiv.getType())) {
-    NewFDiv = UndefValue::get(VT);
+    NewFDiv = PoisonValue::get(VT);
 
     // FIXME: Doesn't do the right thing for cases where the vector is partially
     // constant. This works when the scalarizer pass is run first.
@@ -1260,7 +1260,7 @@ bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) {
     Builder.SetCurrentDebugLocation(I.getDebugLoc());
 
     if (auto *VT = dyn_cast<FixedVectorType>(Ty)) {
-      NewDiv = UndefValue::get(VT);
+      NewDiv = PoisonValue::get(VT);
 
       for (unsigned N = 0, E = VT->getNumElements(); N != E; ++N) {
         Value *NumEltN = Builder.CreateExtractElement(Num, N);
 
@@ -911,12 +911,9 @@ bool AMDGPUPromoteAllocaImpl::handleAlloca(AllocaInst &I, bool SufficientLDS) {
 
   Type *GVTy = ArrayType::get(I.getAllocatedType(), WorkGroupSize);
   GlobalVariable *GV = new GlobalVariable(
-      *Mod, GVTy, false, GlobalValue::InternalLinkage,
-      UndefValue::get(GVTy),
-      Twine(F->getName()) + Twine('.') + I.getName(),
-      nullptr,
-      GlobalVariable::NotThreadLocal,
-      AMDGPUAS::LOCAL_ADDRESS);
+      *Mod, GVTy, false, GlobalValue::InternalLinkage, PoisonValue::get(GVTy),
+      Twine(F->getName()) + Twine('.') + I.getName(), nullptr,
+      GlobalVariable::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS);
   GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
   GV->setAlignment(I.getAlign());
 
 
@@ -340,7 +340,7 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
     B.SetCurrentDebugLocation(RI->getDebugLoc());
 
     int RetIdx = 0;
-    Value *NewRetVal = UndefValue::get(NewRetTy);
+    Value *NewRetVal = PoisonValue::get(NewRetTy);
 
     Value *RetVal = RI->getReturnValue();
     if (RetVal)
@@ -362,7 +362,7 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
     if (OutArgIndexes.count(Arg.getArgNo())) {
       // It's easier to preserve the type of the argument list. We rely on
       // DeadArgumentElimination to take care of these.
-      StubCallArgs.push_back(UndefValue::get(Arg.getType()));
+      StubCallArgs.push_back(PoisonValue::get(Arg.getType()));
     } else {
       StubCallArgs.push_back(&Arg);
     }
 
@@ -224,7 +224,7 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
         DummyReturnBB = BasicBlock::Create(F.getContext(),
                                            "DummyReturnBlock", &F);
         Type *RetTy = F.getReturnType();
-        Value *RetVal = RetTy->isVoidTy() ? nullptr : UndefValue::get(RetTy);
+        Value *RetVal = RetTy->isVoidTy() ? nullptr : PoisonValue::get(RetTy);
         ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB);
         ReturningBlocks.push_back(DummyReturnBB);
       }
@@ -286,7 +286,7 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
       // structurizer/annotator can't handle the multiple exits
 
       Type *RetTy = F.getReturnType();
-      Value *RetVal = RetTy->isVoidTy() ? nullptr : UndefValue::get(RetTy);
+      Value *RetVal = RetTy->isVoidTy() ? nullptr : PoisonValue::get(RetTy);
       // Remove and delete the unreachable inst.
       UnreachableBlock->getTerminator()->eraseFromParent();
 
 
@@ -131,7 +131,7 @@ void SIAnnotateControlFlow::initialize(Module &M, const GCNSubtarget &ST) {
 
   BoolTrue = ConstantInt::getTrue(Context);
   BoolFalse = ConstantInt::getFalse(Context);
-  BoolUndef = UndefValue::get(Boolean);
+  BoolUndef = PoisonValue::get(Boolean);
   IntMaskZero = ConstantInt::get(IntMask, 0);
 
   If = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if, { IntMask });
 
@@ -64,7 +64,7 @@ define amdgpu_cs void @atomic_add_and_format(<4 x i32> inreg %arg) {
 ; IR-NEXT:    [[TMP10:%.*]] = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 [[TMP7]], <4 x i32> [[ARG:%.*]], i32 0, i32 0, i32 0, i32 0)
 ; IR-NEXT:    br label [[TMP11]]
 ; IR:       11:
-; IR-NEXT:    [[TMP12:%.*]] = phi i32 [ undef, [[DOTENTRY:%.*]] ], [ [[TMP10]], [[TMP9]] ]
+; IR-NEXT:    [[TMP12:%.*]] = phi i32 [ poison, [[DOTENTRY:%.*]] ], [ [[TMP10]], [[TMP9]] ]
 ; IR-NEXT:    [[TMP13:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP12]])
 ; IR-NEXT:    [[TMP14:%.*]] = add i32 [[TMP13]], [[TMP5]]
 ; IR-NEXT:    call void @llvm.amdgcn.struct.buffer.store.format.v4i32(<4 x i32> [[ARG]], <4 x i32> [[ARG]], i32 [[TMP14]], i32 0, i32 0, i32 0)
@@ -158,7 +158,7 @@ define amdgpu_cs void @atomic_sub_and_format(<4 x i32> inreg %arg) {
 ; IR-NEXT:    [[TMP10:%.*]] = call i32 @llvm.amdgcn.struct.buffer.atomic.sub.i32(i32 [[TMP7]], <4 x i32> [[ARG:%.*]], i32 0, i32 0, i32 0, i32 0)
 ; IR-NEXT:    br label [[TMP11]]
 ; IR:       11:
-; IR-NEXT:    [[TMP12:%.*]] = phi i32 [ undef, [[DOTENTRY:%.*]] ], [ [[TMP10]], [[TMP9]] ]
+; IR-NEXT:    [[TMP12:%.*]] = phi i32 [ poison, [[DOTENTRY:%.*]] ], [ [[TMP10]], [[TMP9]] ]
 ; IR-NEXT:    [[TMP13:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP12]])
 ; IR-NEXT:    [[TMP14:%.*]] = sub i32 [[TMP13]], [[TMP5]]
 ; IR-NEXT:    call void @llvm.amdgcn.struct.buffer.store.format.v4i32(<4 x i32> [[ARG]], <4 x i32> [[ARG]], i32 [[TMP14]], i32 0, i32 0, i32 0)
@@ -255,7 +255,7 @@ define amdgpu_cs void @atomic_xor_and_format(<4 x i32> inreg %arg) {
 ; IR-NEXT:    [[TMP11:%.*]] = call i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(i32 [[TMP8]], <4 x i32> [[ARG:%.*]], i32 0, i32 0, i32 0, i32 0)
 ; IR-NEXT:    br label [[TMP12]]
 ; IR:       12:
-; IR-NEXT:    [[TMP13:%.*]] = phi i32 [ undef, [[DOTENTRY:%.*]] ], [ [[TMP11]], [[TMP10]] ]
+; IR-NEXT:    [[TMP13:%.*]] = phi i32 [ poison, [[DOTENTRY:%.*]] ], [ [[TMP11]], [[TMP10]] ]
 ; IR-NEXT:    [[TMP14:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP13]])
 ; IR-NEXT:    [[TMP15:%.*]] = and i32 [[TMP5]], 1
 ; IR-NEXT:    [[TMP16:%.*]] = xor i32 [[TMP14]], [[TMP15]]
 
@@ -82,7 +82,7 @@ define amdgpu_kernel void @rcp_fdiv_fpmath(float addrspace(1)* %out, float %x) #
 ; CHECK: %[[NO_A0:[0-9]+]] = extractelement <2 x float> %a, i64 0
 ; CHECK: %[[NO_B0:[0-9]+]] = extractelement <2 x float> %b, i64 0
 ; CHECK: %[[NO_FDIV0:[0-9]+]] = fdiv float %[[NO_A0]], %[[NO_B0]]
-; CHECK: %[[NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[NO_FDIV0]], i64 0
+; CHECK: %[[NO_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[NO_FDIV0]], i64 0
 ; CHECK: %[[NO_A1:[0-9]+]] = extractelement <2 x float> %a, i64 1
 ; CHECK: %[[NO_B1:[0-9]+]] = extractelement <2 x float> %b, i64 1
 ; CHECK: %[[NO_FDIV1:[0-9]+]] = fdiv float %[[NO_A1]], %[[NO_B1]]
@@ -92,7 +92,7 @@ define amdgpu_kernel void @rcp_fdiv_fpmath(float addrspace(1)* %out, float %x) #
 ; CHECK: %[[HALF_A0:[0-9]+]] = extractelement <2 x float> %a, i64 0
 ; CHECK: %[[HALF_B0:[0-9]+]] = extractelement <2 x float> %b, i64 0
 ; CHECK: %[[HALF_FDIV0:[0-9]+]] = fdiv float %[[HALF_A0]], %[[HALF_B0]]
-; CHECK: %[[HALF_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[HALF_FDIV0]], i64 0
+; CHECK: %[[HALF_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[HALF_FDIV0]], i64 0
 ; CHECK: %[[HALF_A1:[0-9]+]] = extractelement <2 x float> %a, i64 1
 ; CHECK: %[[HALF_B1:[0-9]+]] = extractelement <2 x float> %b, i64 1
 ; CHECK: %[[HALF_FDIV1:[0-9]+]] = fdiv float %[[HALF_A1]], %[[HALF_B1]]
@@ -102,7 +102,7 @@ define amdgpu_kernel void @rcp_fdiv_fpmath(float addrspace(1)* %out, float %x) #
 ; CHECK: %[[ONE_A0:[0-9]+]] = extractelement <2 x float> %a, i64 0
 ; CHECK: %[[ONE_B0:[0-9]+]] = extractelement <2 x float> %b, i64 0
 ; CHECK: %[[ONE_FDIV0:[0-9]+]] = fdiv float %[[ONE_A0]], %[[ONE_B0]]
-; CHECK: %[[ONE_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[ONE_FDIV0]], i64 0
+; CHECK: %[[ONE_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[ONE_FDIV0]], i64 0
 ; CHECK: %[[ONE_A1:[0-9]+]] = extractelement <2 x float> %a, i64 1
 ; CHECK: %[[ONE_B1:[0-9]+]] = extractelement <2 x float> %b, i64 1
 ; CHECK: %[[ONE_FDIV1:[0-9]+]] = fdiv float %[[ONE_A1]], %[[ONE_B1]]
@@ -112,7 +112,7 @@ define amdgpu_kernel void @rcp_fdiv_fpmath(float addrspace(1)* %out, float %x) #
 ; CHECK: %[[A0:[0-9]+]] = extractelement <2 x float> %a, i64 0
 ; CHECK: %[[B0:[0-9]+]] = extractelement <2 x float> %b, i64 0
 ; CHECK: %[[FDIV0:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A0]], float %[[B0]])
-; CHECK: %[[INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FDIV0]], i64 0
+; CHECK: %[[INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[FDIV0]], i64 0
 ; CHECK: %[[A1:[0-9]+]] = extractelement <2 x float> %a, i64 1
 ; CHECK: %[[B1:[0-9]+]] = extractelement <2 x float> %b, i64 1
 ; CHECK: %[[FDIV1:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A1]], float %[[B1]])
@@ -136,47 +136,47 @@ define amdgpu_kernel void @fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2
 ; CHECK-LABEL: @rcp_fdiv_fpmath_vector(
 ; CHECK: %[[NO0:[0-9]+]] =  extractelement <2 x float> %x, i64 0
 ; CHECK: %[[NO_FDIV0:[0-9]+]] = fdiv float 1.000000e+00, %[[NO0]]
-; CHECK: %[[NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[NO_FDIV0]], i64 0
+; CHECK: %[[NO_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[NO_FDIV0]], i64 0
 ; CHECK: %[[NO1:[0-9]+]] = extractelement <2 x float> %x, i64 1
 ; CHECK: %[[NO_FDIV1:[0-9]+]] = fdiv float 1.000000e+00, %[[NO1]]
 ; CHECK: %no.md = insertelement <2 x float> %[[NO_INS0]], float %[[NO_FDIV1]], i64 1
 ; CHECK: store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out
 
 ; CHECK: %[[HALF0:[0-9]+]] =  extractelement <2 x float> %x, i64 0
 ; CHECK: %[[HALF_FDIV0:[0-9]+]] = fdiv float 1.000000e+00, %[[HALF0]]
-; CHECK: %[[HALF_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[HALF_FDIV0]], i64 0
+; CHECK: %[[HALF_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[HALF_FDIV0]], i64 0
 ; CHECK: %[[HALF1:[0-9]+]] = extractelement <2 x float> %x, i64 1
 ; CHECK: %[[HALF_FDIV1:[0-9]+]] =  fdiv float 1.000000e+00, %[[HALF1]]
 ; CHECK: %md.half.ulp = insertelement <2 x float> %[[HALF_INS0]], float %[[HALF_FDIV1]], i64 1
 ; CHECK: store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out
 
 ; CHECK: %[[AFN_NO0:[0-9]+]] =  extractelement <2 x float> %x, i64 0
 ; CHECK: %[[AFN_NO_FDIV0:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_NO0]])
-; CHECK: %[[AFN_NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[AFN_NO_FDIV0]], i64 0
+; CHECK: %[[AFN_NO_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[AFN_NO_FDIV0]], i64 0
 ; CHECK: %[[AFN_NO1:[0-9]+]] = extractelement <2 x float> %x, i64 1
 ; CHECK: %[[AFN_NO_FDIV1:[0-9]+]] =  call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_NO1]])
 ; CHECK: %afn.no.md = insertelement <2 x float> %[[AFN_NO_INS0]], float %[[AFN_NO_FDIV1]], i64 1
 ; CHECK: store volatile <2 x float> %afn.no.md, <2 x float> addrspace(1)* %out
 
 ; CHECK: %[[FAST_NO0:[0-9]+]] =  extractelement <2 x float> %x, i64 0
 ; CHECK: %[[FAST_NO_RCP0:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_NO0]])
-; CHECK: %[[FAST_NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FAST_NO_RCP0]], i64 0
+; CHECK: %[[FAST_NO_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[FAST_NO_RCP0]], i64 0
 ; CHECK: %[[FAST_NO1:[0-9]+]] = extractelement <2 x float> %x, i64 1
 ; CHECK: %[[FAST_NO_RCP1:[0-9]+]] =  call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_NO1]])
 ; CHECK: %fast.no.md = insertelement <2 x float> %[[FAST_NO_INS0]], float %[[FAST_NO_RCP1]], i64 1
 ; CHECK: store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out
 
 ; CHECK: %[[AFN_250:[0-9]+]] =  extractelement <2 x float> %x, i64 0
 ; CHECK: %[[AFN_25_RCP0:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_250]])
-; CHECK: %[[AFN_25_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[AFN_25_RCP0]], i64 0
+; CHECK: %[[AFN_25_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[AFN_25_RCP0]], i64 0
 ; CHECK: %[[AFN_251:[0-9]+]] = extractelement <2 x float> %x, i64 1
 ; CHECK: %[[AFN_25_RCP1:[0-9]+]] =  call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_251]])
 ; CHECK: %afn.25ulp = insertelement <2 x float> %[[AFN_25_INS0]], float %[[AFN_25_RCP1]], i64 1
 ; CHECK: store volatile <2 x float> %afn.25ulp, <2 x float> addrspace(1)* %out
 
 ; CHECK: %[[FAST_250:[0-9]+]] =  extractelement <2 x float> %x, i64 0
 ; CHECK: %[[FAST_25_RCP0:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_250]])
-; CHECK: %[[FAST_25_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FAST_25_RCP0]], i64 0
+; CHECK: %[[FAST_25_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[FAST_25_RCP0]], i64 0
 ; CHECK: %[[FAST_251:[0-9]+]] = extractelement <2 x float> %x, i64 1
 ; CHECK: %[[FAST_25_RCP1:[0-9]+]] =  call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_251]])
 ; CHECK: %fast.25ulp = insertelement <2 x float> %[[FAST_25_INS0]], float %[[FAST_25_RCP1]], i64 1
@@ -206,15 +206,15 @@ define amdgpu_kernel void @rcp_fdiv_fpmath_vector(<2 x float> addrspace(1)* %out
 ; CHECK-LABEL: @rcp_fdiv_fpmath_vector_nonsplat(
 ; CHECK: %[[NO0:[0-9]+]] =  extractelement <2 x float> %x, i64 0
 ; CHECK: %[[NO_FDIV0:[0-9]+]] = fdiv float 1.000000e+00, %[[NO0]]
-; CHECK: %[[NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[NO_FDIV0]], i64 0
+; CHECK: %[[NO_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[NO_FDIV0]], i64 0
 ; CHECK: %[[NO1:[0-9]+]] = extractelement <2 x float> %x, i64 1
 ; CHECK: %[[NO_FDIV1:[0-9]+]] = fdiv float 2.000000e+00, %[[NO1]]
 ; CHECK: %no.md = insertelement <2 x float> %[[NO_INS0]], float %[[NO_FDIV1]], i64 1
 ; CHECK: store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out
 
 ; CHECK: %[[AFN_NO0:[0-9]+]] =  extractelement <2 x float> %x, i64 0
 ; CHECK: %[[AFN_NO_FDIV0:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_NO0]])
-; CHECK: %[[AFN_NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[AFN_NO_FDIV0]], i64 0
+; CHECK: %[[AFN_NO_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[AFN_NO_FDIV0]], i64 0
 ; CHECK: %[[AFN_NO1:[0-9]+]] = extractelement <2 x float> %x, i64 1
 ; CHECK: %[[AFN_NO_FDIV1:[0-9]+]] =  call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_NO1]])
 ; CHECK: %[[AFN_NO_MUL1:[0-9]+]] = fmul afn float 2.000000e+00, %[[AFN_NO_FDIV1]]
@@ -223,7 +223,7 @@ define amdgpu_kernel void @rcp_fdiv_fpmath_vector(<2 x float> addrspace(1)* %out
 
 ; CHECK: %[[FAST_NO0:[0-9]+]] =  extractelement <2 x float> %x, i64 0
 ; CHECK: %[[FAST_NO_RCP0:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_NO0]])
-; CHECK: %[[FAST_NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FAST_NO_RCP0]], i64 0
+; CHECK: %[[FAST_NO_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[FAST_NO_RCP0]], i64 0
 ; CHECK: %[[FAST_NO1:[0-9]+]] = extractelement <2 x float> %x, i64 1
 ; CHECK: %[[FAST_NO_RCP1:[0-9]+]] =  call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_NO1]])
 ; CHECK: %[[FAST_NO_MUL1:[0-9]+]] = fmul fast float 2.000000e+00, %[[FAST_NO_RCP1]]
@@ -232,7 +232,7 @@ define amdgpu_kernel void @rcp_fdiv_fpmath_vector(<2 x float> addrspace(1)* %out
 
 ; CHECK: %[[AFN_250:[0-9]+]] =  extractelement <2 x float> %x, i64 0
 ; CHECK: %[[AFN_25_RCP0:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_250]])
-; CHECK: %[[AFN_25_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[AFN_25_RCP0]], i64 0
+; CHECK: %[[AFN_25_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[AFN_25_RCP0]], i64 0
 ; CHECK: %[[AFN_251:[0-9]+]] = extractelement <2 x float> %x, i64 1
 ; CHECK: %[[AFN_25_RCP1:[0-9]+]] =  call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_251]])
 ; CHECK: %[[AFN_25_MUL1:[0-9]+]] = fmul afn float 2.000000e+00, %[[AFN_25_RCP1]]
@@ -241,7 +241,7 @@ define amdgpu_kernel void @rcp_fdiv_fpmath_vector(<2 x float> addrspace(1)* %out
 
 ; CHECK: %[[FAST_250:[0-9]+]] =  extractelement <2 x float> %x, i64 0
 ; CHECK: %[[FAST_25_RCP0:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_250]])
-; CHECK: %[[FAST_25_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FAST_25_RCP0]], i64 0
+; CHECK: %[[FAST_25_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[FAST_25_RCP0]], i64 0
 ; CHECK: %[[FAST_251:[0-9]+]] = extractelement <2 x float> %x, i64 1
 ; CHECK: %[[FAST_25_RCP1:[0-9]+]] =  call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_251]])
 ; CHECK: %[[FAST_25_MUL1:[0-9]+]] = fmul fast float 2.000000e+00, %[[FAST_25_RCP1]]
@@ -271,7 +271,7 @@ define amdgpu_kernel void @rcp_fdiv_fpmath_vector_nonsplat(<2 x float> addrspace
 ; CHECK: %[[AFN_B0:[0-9]+]] = extractelement <2 x float> %y, i64 0
 ; CHECK: %[[AFN_RCP0:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_B0]])
 ; CHECK: %[[AFN_MUL0:[0-9]+]] = fmul afn float %[[AFN_A0]], %[[AFN_RCP0]]
-; CHECK: %[[AFN_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[AFN_MUL0]], i64 0
+; CHECK: %[[AFN_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[AFN_MUL0]], i64 0
 ; CHECK: %[[AFN_A1:[0-9]+]] = extractelement <2 x float> %x.insert, i64 1
 ; CHECK: %[[AFN_B1:[0-9]+]] = extractelement <2 x float> %y, i64 1
 ; CHECK: %[[AFN_RCP1:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_B1]])
@@ -283,7 +283,7 @@ define amdgpu_kernel void @rcp_fdiv_fpmath_vector_nonsplat(<2 x float> addrspace
 ; CHECK: %[[FAST_B0:[0-9]+]] = extractelement <2 x float> %y, i64 0
 ; CHECK: %[[FAST_RCP0:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_B0]])
 ; CHECK: %[[FAST_MUL0:[0-9]+]] = fmul fast float %[[FAST_A0]], %[[FAST_RCP0]]
-; CHECK: %[[FAST_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FAST_MUL0]], i64 0
+; CHECK: %[[FAST_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[FAST_MUL0]], i64 0
 ; CHECK: %[[FAST_A1:[0-9]+]] = extractelement <2 x float> %x.insert, i64 1
 ; CHECK: %[[FAST_B1:[0-9]+]] = extractelement <2 x float> %y, i64 1
 ; CHECK: %[[FAST_RCP1:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_B1]])