Skip to content

Commit 37125e3

Browse files
committed
[CFG] Simplify gather/scatter splat pointer matching
The primary goal of this change is to simplify code, but it also ends up being slightly more powerful. Rather than repeating the gather/scatter of splat logic in both CGP and SDAG, generalize the SDAG copy slightly and delete the CGP version. The X86 codegen diffs are improvements - we were scaling a zero value by 4, whereas now we're not scaling it. This codegen can likely be further improved, but that'll be in upcoming patches.
1 parent a848916 commit 37125e3

File tree

7 files changed

+99
-122
lines changed

7 files changed

+99
-122
lines changed

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 76 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -6230,7 +6230,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
62306230

62316231
/// Rewrite GEP input to gather/scatter to enable SelectionDAGBuilder to find
62326232
/// a uniform base to use for ISD::MGATHER/MSCATTER. SelectionDAGBuilder can
6233-
/// only handle a 2 operand GEP in the same basic block or a splat constant
6233+
/// only handle a 2 operand GEP in the same basic block or a canonical splat
62346234
/// vector. The 2 operands to the GEP must have a scalar pointer and a vector
62356235
/// index.
62366236
///
@@ -6247,124 +6247,98 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
62476247
/// zero index.
62486248
bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
62496249
Value *Ptr) {
6250-
Value *NewAddr;
6250+
const auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
6251+
if (!GEP)
6252+
return false;
62516253

6252-
if (const auto *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
6253-
// Don't optimize GEPs that don't have indices.
6254-
if (!GEP->hasIndices())
6255-
return false;
6254+
// Don't optimize GEPs that don't have indices.
6255+
if (!GEP->hasIndices())
6256+
return false;
62566257

6257-
// If the GEP and the gather/scatter aren't in the same BB, don't optimize.
6258-
// FIXME: We should support this by sinking the GEP.
6259-
if (MemoryInst->getParent() != GEP->getParent())
6260-
return false;
6258+
// If the GEP and the gather/scatter aren't in the same BB, don't optimize.
6259+
// FIXME: We should support this by sinking the GEP.
6260+
if (MemoryInst->getParent() != GEP->getParent())
6261+
return false;
62616262

6262-
SmallVector<Value *, 2> Ops(GEP->operands());
6263+
SmallVector<Value *, 2> Ops(GEP->operands());
62636264

6264-
bool RewriteGEP = false;
6265+
bool RewriteGEP = false;
62656266

6266-
if (Ops[0]->getType()->isVectorTy()) {
6267-
Ops[0] = getSplatValue(Ops[0]);
6268-
if (!Ops[0])
6269-
return false;
6270-
RewriteGEP = true;
6271-
}
6267+
if (Ops[0]->getType()->isVectorTy()) {
6268+
Ops[0] = getSplatValue(Ops[0]);
6269+
if (!Ops[0])
6270+
return false;
6271+
RewriteGEP = true;
6272+
}
62726273

6273-
unsigned FinalIndex = Ops.size() - 1;
6274+
unsigned FinalIndex = Ops.size() - 1;
62746275

6275-
// Ensure all but the last index is 0.
6276-
// FIXME: This isn't strictly required. All that's required is that they are
6277-
// all scalars or splats.
6278-
for (unsigned i = 1; i < FinalIndex; ++i) {
6279-
auto *C = dyn_cast<Constant>(Ops[i]);
6280-
if (!C)
6281-
return false;
6282-
if (isa<VectorType>(C->getType()))
6283-
C = C->getSplatValue();
6284-
auto *CI = dyn_cast_or_null<ConstantInt>(C);
6285-
if (!CI || !CI->isZero())
6286-
return false;
6287-
// Scalarize the index if needed.
6288-
Ops[i] = CI;
6289-
}
6290-
6291-
// Try to scalarize the final index.
6292-
if (Ops[FinalIndex]->getType()->isVectorTy()) {
6293-
if (Value *V = getSplatValue(Ops[FinalIndex])) {
6294-
auto *C = dyn_cast<ConstantInt>(V);
6295-
// Don't scalarize all zeros vector.
6296-
if (!C || !C->isZero()) {
6297-
Ops[FinalIndex] = V;
6298-
RewriteGEP = true;
6299-
}
6276+
// Ensure all but the last index is 0.
6277+
// FIXME: This isn't strictly required. All that's required is that they are
6278+
// all scalars or splats.
6279+
for (unsigned i = 1; i < FinalIndex; ++i) {
6280+
auto *C = dyn_cast<Constant>(Ops[i]);
6281+
if (!C)
6282+
return false;
6283+
if (isa<VectorType>(C->getType()))
6284+
C = C->getSplatValue();
6285+
auto *CI = dyn_cast_or_null<ConstantInt>(C);
6286+
if (!CI || !CI->isZero())
6287+
return false;
6288+
// Scalarize the index if needed.
6289+
Ops[i] = CI;
6290+
}
6291+
6292+
// Try to scalarize the final index.
6293+
if (Ops[FinalIndex]->getType()->isVectorTy()) {
6294+
if (Value *V = getSplatValue(Ops[FinalIndex])) {
6295+
auto *C = dyn_cast<ConstantInt>(V);
6296+
// Don't scalarize all zeros vector.
6297+
if (!C || !C->isZero()) {
6298+
Ops[FinalIndex] = V;
6299+
RewriteGEP = true;
63006300
}
63016301
}
6302+
}
63026303

6303-
// If we made any changes or the we have extra operands, we need to generate
6304-
// new instructions.
6305-
if (!RewriteGEP && Ops.size() == 2)
6306-
return false;
6304+
// If we made any changes or the we have extra operands, we need to generate
6305+
// new instructions.
6306+
if (!RewriteGEP && Ops.size() == 2)
6307+
return false;
63076308

6308-
auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
6309+
auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
63096310

6310-
IRBuilder<> Builder(MemoryInst);
6311+
IRBuilder<> Builder(MemoryInst);
63116312

6312-
Type *SourceTy = GEP->getSourceElementType();
6313-
Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType());
6313+
Type *SourceTy = GEP->getSourceElementType();
6314+
Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType());
63146315

6315-
// If the final index isn't a vector, emit a scalar GEP containing all ops
6316-
// and a vector GEP with all zeroes final index.
6317-
if (!Ops[FinalIndex]->getType()->isVectorTy()) {
6318-
NewAddr = Builder.CreateGEP(SourceTy, Ops[0], ArrayRef(Ops).drop_front());
6319-
auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
6320-
auto *SecondTy = GetElementPtrInst::getIndexedType(
6321-
SourceTy, ArrayRef(Ops).drop_front());
6322-
NewAddr =
6323-
Builder.CreateGEP(SecondTy, NewAddr, Constant::getNullValue(IndexTy));
6324-
} else {
6325-
Value *Base = Ops[0];
6326-
Value *Index = Ops[FinalIndex];
6327-
6328-
// Create a scalar GEP if there are more than 2 operands.
6329-
if (Ops.size() != 2) {
6330-
// Replace the last index with 0.
6331-
Ops[FinalIndex] =
6332-
Constant::getNullValue(Ops[FinalIndex]->getType()->getScalarType());
6333-
Base = Builder.CreateGEP(SourceTy, Base, ArrayRef(Ops).drop_front());
6334-
SourceTy = GetElementPtrInst::getIndexedType(
6335-
SourceTy, ArrayRef(Ops).drop_front());
6336-
}
6316+
// If the final index isn't a vector, emit a scalar GEP containing all ops
6317+
// and a vector GEP with all zeroes final index.
6318+
Value *NewAddr;
6319+
if (!Ops[FinalIndex]->getType()->isVectorTy()) {
6320+
NewAddr = Builder.CreateGEP(SourceTy, Ops[0], ArrayRef(Ops).drop_front());
6321+
auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
6322+
auto *SecondTy =
6323+
GetElementPtrInst::getIndexedType(SourceTy, ArrayRef(Ops).drop_front());
6324+
NewAddr =
6325+
Builder.CreateGEP(SecondTy, NewAddr, Constant::getNullValue(IndexTy));
6326+
} else {
6327+
Value *Base = Ops[0];
6328+
Value *Index = Ops[FinalIndex];
63376329

6338-
// Now create the GEP with scalar pointer and vector index.
6339-
NewAddr = Builder.CreateGEP(SourceTy, Base, Index);
6330+
// Create a scalar GEP if there are more than 2 operands.
6331+
if (Ops.size() != 2) {
6332+
// Replace the last index with 0.
6333+
Ops[FinalIndex] =
6334+
Constant::getNullValue(Ops[FinalIndex]->getType()->getScalarType());
6335+
Base = Builder.CreateGEP(SourceTy, Base, ArrayRef(Ops).drop_front());
6336+
SourceTy = GetElementPtrInst::getIndexedType(SourceTy,
6337+
ArrayRef(Ops).drop_front());
63406338
}
6341-
} else if (!isa<Constant>(Ptr)) {
6342-
// Not a GEP, maybe its a splat and we can create a GEP to enable
6343-
// SelectionDAGBuilder to use it as a uniform base.
6344-
Value *V = getSplatValue(Ptr);
6345-
if (!V)
6346-
return false;
6347-
6348-
auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
63496339

6350-
IRBuilder<> Builder(MemoryInst);
6351-
6352-
// Emit a vector GEP with a scalar pointer and all 0s vector index.
6353-
Type *ScalarIndexTy = DL->getIndexType(V->getType()->getScalarType());
6354-
auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
6355-
Type *ScalarTy;
6356-
if (cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
6357-
Intrinsic::masked_gather) {
6358-
ScalarTy = MemoryInst->getType()->getScalarType();
6359-
} else {
6360-
assert(cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
6361-
Intrinsic::masked_scatter);
6362-
ScalarTy = MemoryInst->getOperand(0)->getType()->getScalarType();
6363-
}
6364-
NewAddr = Builder.CreateGEP(ScalarTy, V, Constant::getNullValue(IndexTy));
6365-
} else {
6366-
// Constant, SelectionDAGBuilder knows to check if its a splat.
6367-
return false;
6340+
// Now create the GEP with scalar pointer and vector index.
6341+
NewAddr = Builder.CreateGEP(SourceTy, Base, Index);
63686342
}
63696343

63706344
MemoryInst->replaceUsesOfWith(Ptr, NewAddr);

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4888,14 +4888,9 @@ static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index,
48884888

48894889
assert(Ptr->getType()->isVectorTy() && "Unexpected pointer type");
48904890

4891-
// Handle splat constant pointer.
4892-
if (auto *C = dyn_cast<Constant>(Ptr)) {
4893-
C = C->getSplatValue();
4894-
if (!C)
4895-
return false;
4896-
4897-
Base = SDB->getValue(C);
4898-
4891+
// Handle splat (possibly constant) pointer.
4892+
if (Value *ScalarV = getSplatValue(Ptr)) {
4893+
Base = SDB->getValue(ScalarV);
48994894
ElementCount NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
49004895
EVT VT = EVT::getVectorVT(*DAG.getContext(), TLI.getPointerTy(DL), NumElts);
49014896
Index = DAG.getConstant(0, SDB->getCurSDLoc(), VT);

llvm/test/CodeGen/X86/masked_gather_scatter.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4475,7 +4475,7 @@ define <4 x i32> @splat_ptr_gather(ptr %ptr, <4 x i1> %mask, <4 x i32> %passthru
44754475
; X64-SKX-NEXT: vpslld $31, %xmm0, %xmm0
44764476
; X64-SKX-NEXT: vpmovd2m %xmm0, %k1
44774477
; X64-SKX-NEXT: vpxor %xmm0, %xmm0, %xmm0
4478-
; X64-SKX-NEXT: vpgatherdd (%rdi,%xmm0,4), %xmm1 {%k1}
4478+
; X64-SKX-NEXT: vpgatherdd (%rdi,%xmm0), %xmm1 {%k1}
44794479
; X64-SKX-NEXT: vmovdqa %xmm1, %xmm0
44804480
; X64-SKX-NEXT: retq
44814481
;
@@ -4485,7 +4485,7 @@ define <4 x i32> @splat_ptr_gather(ptr %ptr, <4 x i1> %mask, <4 x i32> %passthru
44854485
; X86-SKX-NEXT: vpmovd2m %xmm0, %k1
44864486
; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %eax
44874487
; X86-SKX-NEXT: vpxor %xmm0, %xmm0, %xmm0
4488-
; X86-SKX-NEXT: vpgatherdd (%eax,%xmm0,4), %xmm1 {%k1}
4488+
; X86-SKX-NEXT: vpgatherdd (%eax,%xmm0), %xmm1 {%k1}
44894489
; X86-SKX-NEXT: vmovdqa %xmm1, %xmm0
44904490
; X86-SKX-NEXT: retl
44914491
%1 = insertelement <4 x ptr> undef, ptr %ptr, i32 0
@@ -4581,7 +4581,7 @@ define void @splat_ptr_scatter(ptr %ptr, <4 x i1> %mask, <4 x i32> %val) {
45814581
; X64-SKX-NEXT: vpslld $31, %xmm0, %xmm0
45824582
; X64-SKX-NEXT: vpmovd2m %xmm0, %k1
45834583
; X64-SKX-NEXT: vpxor %xmm0, %xmm0, %xmm0
4584-
; X64-SKX-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,4) {%k1}
4584+
; X64-SKX-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0) {%k1}
45854585
; X64-SKX-NEXT: retq
45864586
;
45874587
; X86-SKX-LABEL: splat_ptr_scatter:
@@ -4590,7 +4590,7 @@ define void @splat_ptr_scatter(ptr %ptr, <4 x i1> %mask, <4 x i32> %val) {
45904590
; X86-SKX-NEXT: vpmovd2m %xmm0, %k1
45914591
; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %eax
45924592
; X86-SKX-NEXT: vpxor %xmm0, %xmm0, %xmm0
4593-
; X86-SKX-NEXT: vpscatterdd %xmm1, (%eax,%xmm0,4) {%k1}
4593+
; X86-SKX-NEXT: vpscatterdd %xmm1, (%eax,%xmm0) {%k1}
45944594
; X86-SKX-NEXT: retl
45954595
%1 = insertelement <4 x ptr> undef, ptr %ptr, i32 0
45964596
%2 = shufflevector <4 x ptr> %1, <4 x ptr> undef, <4 x i32> zeroinitializer

llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt-inseltpoison.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,8 @@ define <vscale x 4 x i32> @global_struct_splat(<vscale x 4 x i1> %mask) #0 {
8585

8686
define <vscale x 4 x i32> @splat_ptr_gather(ptr %ptr, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %passthru) #0 {
8787
; CHECK-LABEL: @splat_ptr_gather(
88-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[PTR:%.*]], <vscale x 4 x i64> zeroinitializer
88+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[PTR:%.*]], i32 0
89+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <vscale x 4 x ptr> [[TMP3]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
8990
; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP1]], i32 4, <vscale x 4 x i1> [[MASK:%.*]], <vscale x 4 x i32> [[PASSTHRU:%.*]])
9091
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
9192
;
@@ -97,7 +98,8 @@ define <vscale x 4 x i32> @splat_ptr_gather(ptr %ptr, <vscale x 4 x i1> %mask, <
9798

9899
define void @splat_ptr_scatter(ptr %ptr, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %val) #0 {
99100
; CHECK-LABEL: @splat_ptr_scatter(
100-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[PTR:%.*]], <vscale x 4 x i64> zeroinitializer
101+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[PTR:%.*]], i32 0
102+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <vscale x 4 x ptr> [[TMP2]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
101103
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[VAL:%.*]], <vscale x 4 x ptr> [[TMP1]], i32 4, <vscale x 4 x i1> [[MASK:%.*]])
102104
; CHECK-NEXT: ret void
103105
;

llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,8 @@ define <vscale x 4 x i32> @global_struct_splat(<vscale x 4 x i1> %mask) #0 {
8585

8686
define <vscale x 4 x i32> @splat_ptr_gather(ptr %ptr, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %passthru) #0 {
8787
; CHECK-LABEL: @splat_ptr_gather(
88-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[PTR:%.*]], <vscale x 4 x i64> zeroinitializer
88+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <vscale x 4 x ptr> undef, ptr [[PTR:%.*]], i32 0
89+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <vscale x 4 x ptr> [[TMP3]], <vscale x 4 x ptr> undef, <vscale x 4 x i32> zeroinitializer
8990
; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP1]], i32 4, <vscale x 4 x i1> [[MASK:%.*]], <vscale x 4 x i32> [[PASSTHRU:%.*]])
9091
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
9192
;
@@ -97,7 +98,8 @@ define <vscale x 4 x i32> @splat_ptr_gather(ptr %ptr, <vscale x 4 x i1> %mask, <
9798

9899
define void @splat_ptr_scatter(ptr %ptr, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %val) #0 {
99100
; CHECK-LABEL: @splat_ptr_scatter(
100-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[PTR:%.*]], <vscale x 4 x i64> zeroinitializer
101+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <vscale x 4 x ptr> undef, ptr [[PTR:%.*]], i32 0
102+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <vscale x 4 x ptr> [[TMP2]], <vscale x 4 x ptr> undef, <vscale x 4 x i32> zeroinitializer
101103
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[VAL:%.*]], <vscale x 4 x ptr> [[TMP1]], i32 4, <vscale x 4 x i1> [[MASK:%.*]])
102104
; CHECK-NEXT: ret void
103105
;

llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt-inseltpoison.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,8 @@ define <4 x i32> @global_struct_splat() {
8888

8989
define <4 x i32> @splat_ptr_gather(ptr %ptr, <4 x i1> %mask, <4 x i32> %passthru) {
9090
; CHECK-LABEL: @splat_ptr_gather(
91-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[PTR:%.*]], <4 x i64> zeroinitializer
91+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr> poison, ptr [[PTR:%.*]], i32 0
92+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <4 x i32> zeroinitializer
9293
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> [[MASK:%.*]], <4 x i32> [[PASSTHRU:%.*]])
9394
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
9495
;
@@ -100,7 +101,8 @@ define <4 x i32> @splat_ptr_gather(ptr %ptr, <4 x i1> %mask, <4 x i32> %passthru
100101

101102
define void @splat_ptr_scatter(ptr %ptr, <4 x i1> %mask, <4 x i32> %val) {
102103
; CHECK-LABEL: @splat_ptr_scatter(
103-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[PTR:%.*]], <4 x i64> zeroinitializer
104+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x ptr> poison, ptr [[PTR:%.*]], i32 0
105+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x ptr> [[TMP2]], <4 x ptr> poison, <4 x i32> zeroinitializer
104106
; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[VAL:%.*]], <4 x ptr> [[TMP1]], i32 4, <4 x i1> [[MASK:%.*]])
105107
; CHECK-NEXT: ret void
106108
;

llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,8 @@ define <4 x i32> @global_struct_splat() {
8787

8888
define <4 x i32> @splat_ptr_gather(ptr %ptr, <4 x i1> %mask, <4 x i32> %passthru) {
8989
; CHECK-LABEL: @splat_ptr_gather(
90-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[PTR:%.*]], <4 x i64> zeroinitializer
90+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr> undef, ptr [[PTR:%.*]], i32 0
91+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> undef, <4 x i32> zeroinitializer
9192
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> [[MASK:%.*]], <4 x i32> [[PASSTHRU:%.*]])
9293
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
9394
;
@@ -99,7 +100,8 @@ define <4 x i32> @splat_ptr_gather(ptr %ptr, <4 x i1> %mask, <4 x i32> %passthru
99100

100101
define void @splat_ptr_scatter(ptr %ptr, <4 x i1> %mask, <4 x i32> %val) {
101102
; CHECK-LABEL: @splat_ptr_scatter(
102-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[PTR:%.*]], <4 x i64> zeroinitializer
103+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x ptr> undef, ptr [[PTR:%.*]], i32 0
104+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x ptr> [[TMP2]], <4 x ptr> undef, <4 x i32> zeroinitializer
103105
; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[VAL:%.*]], <4 x ptr> [[TMP1]], i32 4, <4 x i1> [[MASK:%.*]])
104106
; CHECK-NEXT: ret void
105107
;

0 commit comments

Comments
 (0)