diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 935f21fd484f3..141af344f0e16 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -248,28 +248,43 @@ static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI, return OW_Unknown; if (KillingII->getIntrinsicID() != DeadII->getIntrinsicID()) return OW_Unknown; - if (KillingII->getIntrinsicID() == Intrinsic::masked_store) { - // Type size. - VectorType *KillingTy = - cast(KillingII->getArgOperand(0)->getType()); - VectorType *DeadTy = cast(DeadII->getArgOperand(0)->getType()); - if (KillingTy->getScalarSizeInBits() != DeadTy->getScalarSizeInBits()) + + switch (KillingII->getIntrinsicID()) { + case Intrinsic::masked_store: + case Intrinsic::vp_store: { + const DataLayout &DL = KillingII->getDataLayout(); + auto *KillingTy = KillingII->getArgOperand(0)->getType(); + auto *DeadTy = DeadII->getArgOperand(0)->getType(); + if (DL.getTypeSizeInBits(KillingTy) != DL.getTypeSizeInBits(DeadTy)) return OW_Unknown; // Element count. - if (KillingTy->getElementCount() != DeadTy->getElementCount()) + if (cast(KillingTy)->getElementCount() != + cast(DeadTy)->getElementCount()) return OW_Unknown; // Pointers. - Value *KillingPtr = KillingII->getArgOperand(1)->stripPointerCasts(); - Value *DeadPtr = DeadII->getArgOperand(1)->stripPointerCasts(); + Value *KillingPtr = KillingII->getArgOperand(1); + Value *DeadPtr = DeadII->getArgOperand(1); if (KillingPtr != DeadPtr && !AA.isMustAlias(KillingPtr, DeadPtr)) return OW_Unknown; - // Masks. - // TODO: check that KillingII's mask is a superset of the DeadII's mask. - if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3)) - return OW_Unknown; + if (KillingII->getIntrinsicID() == Intrinsic::masked_store) { + // Masks. + // TODO: check that KillingII's mask is a superset of the DeadII's mask. + if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3)) + return OW_Unknown; + } else if (KillingII->getIntrinsicID() == Intrinsic::vp_store) { + // Masks. + // TODO: check that KillingII's mask is a superset of the DeadII's mask. + if (KillingII->getArgOperand(2) != DeadII->getArgOperand(2)) + return OW_Unknown; + // Lengths. + if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3)) + return OW_Unknown; + } return OW_Complete; } - return OW_Unknown; + default: + return OW_Unknown; + } } /// Return 'OW_Complete' if a store to the 'KillingLoc' location completely diff --git a/llvm/test/Transforms/DeadStoreElimination/dead-vp.store.ll b/llvm/test/Transforms/DeadStoreElimination/dead-vp.store.ll new file mode 100644 index 0000000000000..7ba1354d8cd0b --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/dead-vp.store.ll @@ -0,0 +1,93 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=dse -S < %s | FileCheck %s + +; Test predicated vector length masked stores for elimination + +define void @test1(ptr %a, i32 %vl, %v1, %v2) { +; +; CHECK-LABEL: @test1( +; CHECK-NEXT: [[VP_OP:%.*]] = call @llvm.vp.add.nxv8i32( [[V1:%.*]], [[V2:%.*]], splat (i1 true), i32 [[VL:%.*]]) +; CHECK-NEXT: call void @llvm.vp.store.nxv8i32.p0( [[VP_OP]], ptr nonnull [[A:%.*]], splat (i1 true), i32 [[VL]]) +; CHECK-NEXT: ret void +; + call void @llvm.vp.store.nxv8i32.p0( %v1, ptr nonnull %a, splat (i1 true), i32 %vl) + %vp.op = call @llvm.vp.add.nxv8i32( %v1, %v2, splat (i1 true), i32 %vl) + call void @llvm.vp.store.nxv8i32.p0( %vp.op, ptr nonnull %a, splat (i1 true), i32 %vl) + ret void +} + +; False test for different vector lengths + +define void @test2(ptr %a, i32 %vl1, i32 %vl2, %v1, %v2) { +; +; CHECK-LABEL: @test2( +; CHECK-NEXT: call void @llvm.vp.store.nxv8i32.p0( [[V1:%.*]], ptr nonnull [[A:%.*]], splat (i1 true), i32 [[VL1:%.*]]) +; CHECK-NEXT: call void @llvm.vp.store.nxv8i32.p0( [[V2:%.*]], ptr nonnull [[A]], splat (i1 true), i32 [[VL2:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.vp.store.nxv8i32.p0( %v1, ptr nonnull %a, splat (i1 true), i32 %vl1) + call void @llvm.vp.store.nxv8i32.p0( %v2, ptr nonnull %a, splat (i1 true), i32 %vl2) + ret void +} + +; False test for different types + +define void @test3(ptr %a, i32 %vl1, i32 %vl2, %v1, %v2) { +; +; CHECK-LABEL: @test3( +; CHECK-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[V1:%.*]], ptr nonnull [[A:%.*]], splat (i1 true), i32 [[VL1:%.*]]) +; CHECK-NEXT: call void @llvm.vp.store.nxv8i32.p0( [[V2:%.*]], ptr nonnull [[A]], splat (i1 true), i32 [[VL2:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.vp.store.nxv4i32.p0( %v1, ptr nonnull %a, splat (i1 true), i32 %vl1) + call void @llvm.vp.store.nxv8i32.p0( %v2, ptr nonnull %a, splat (i1 true), i32 %vl2) + ret void +} + +; False test for different element count + +define void @test4(ptr %a, i32 %vl, %v1, %v2) { +; +; CHECK-LABEL: @test4( +; CHECK-NEXT: call void @llvm.vp.store.nxv4i64.p0( [[V1:%.*]], ptr nonnull [[A:%.*]], splat (i1 true), i32 [[VL:%.*]]) +; CHECK-NEXT: call void @llvm.vp.store.nxv8i32.p0( [[V2:%.*]], ptr nonnull [[A]], splat (i1 true), i32 [[VL]]) +; CHECK-NEXT: ret void +; + call void @llvm.vp.store.nxv4i64.p0( %v1, ptr nonnull %a, splat (i1 true), i32 %vl) + call void @llvm.vp.store.nxv8i32.p0( %v2, ptr nonnull %a, splat (i1 true), i32 %vl) + ret void +} + +; False test for different masks + +define void @test5(ptr %a, i32 %vl, %v1, %v2, %m1, %m2) { +; +; CHECK-LABEL: @test5( +; CHECK-NEXT: call void @llvm.vp.store.nxv8i32.p0( [[V1:%.*]], ptr nonnull [[A:%.*]], [[M1:%.*]], i32 [[VL:%.*]]) +; CHECK-NEXT: call void @llvm.vp.store.nxv8i32.p0( [[V2:%.*]], ptr nonnull [[A]], [[M2:%.*]], i32 [[VL]]) +; CHECK-NEXT: ret void +; + call void @llvm.vp.store.nxv8i32.p0( %v1, ptr nonnull %a, %m1, i32 %vl) + call void @llvm.vp.store.nxv8i32.p0( %v2, ptr nonnull %a, %m2, i32 %vl) + ret void +} + +; False test for different pointers + +define void @test6(ptr %a, ptr %b, i32 %vl, %v1, %v2, %m1) { +; +; CHECK-LABEL: @test6( +; CHECK-NEXT: call void @llvm.vp.store.nxv8i32.p0( [[V1:%.*]], ptr nonnull [[A:%.*]], [[M1:%.*]], i32 [[VL:%.*]]) +; CHECK-NEXT: call void @llvm.vp.store.nxv8i32.p0( [[V2:%.*]], ptr nonnull [[B:%.*]], [[M1]], i32 [[VL]]) +; CHECK-NEXT: ret void +; + call void @llvm.vp.store.nxv8i32.p0( %v1, ptr nonnull %a, %m1, i32 %vl) + call void @llvm.vp.store.nxv8i32.p0( %v2, ptr nonnull %b, %m1, i32 %vl) + ret void +} + +declare @llvm.vp.add.nxv8i32(, , , i32) +declare void @llvm.vp.store.nxv8i32.p0(, ptr nocapture, , i32) +declare void @llvm.vp.store.nxv4i32.p0(, ptr nocapture, , i32) +declare void @llvm.vp.store.nxv4i64.p0(, ptr nocapture, , i32) +