From 3a0a0c2ac2cb7f548fa3d3c662a80ecf4527f596 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Tue, 10 Dec 2024 15:52:16 +0000 Subject: [PATCH] VT: don't create undef in isBytewiseValue The original motivation for this patch derives from a correctness issue found in MemCpyOpt's fca2memcpy.ll test by Alive2. The output from Alive2 is reproduced below: ---------------------------------------- define void @addrproducer(ptr %src, ptr %dst) { %#1 = load {ptr, i8, i24, i32}, ptr %src, align 8 store {ptr, i8, i24, i32} undef, ptr %dst, align 8 %dst2 = gep ptr %dst, 16 x i64 1 store {ptr, i8, i24, i32} %#1, ptr %dst2, align 8 ret void } => define void @addrproducer(ptr %src, ptr %dst) { %dst2 = gep ptr %dst, 16 x i64 1 memmove ptr %dst2 align 8, ptr %src align 8, i64 16 memset ptr %dst align 8, i8 undef, i64 16 ret void } Transformation doesn't verify! (unsound) ERROR: Mismatch in memory Example: ptr %src = pointer(non-local, block_id=1, offset=0) / Address=#x08 ptr %dst = pointer(non-local, block_id=1, offset=0) / Address=#x08 Source: {ptr, i8, i24, i32} %#1 = { poison, poison, poison, poison } ptr %dst2 = pointer(non-local, block_id=1, offset=16) / Address=#x18 SOURCE MEMORY STATE =================== NON-LOCAL BLOCKS: Block 0 > size: 0 align: 1 alloc type: 0 alive: false address: 0 Block 1 > size: 71 align: 1 alloc type: 0 alive: true address: 8 Block 2 > size: 0 align: 2 alloc type: 0 alive: true address: 4 Block 3 > size: 0 align: 1 alloc type: 0 alive: true address: 4 Target: ptr %dst2 = pointer(non-local, block_id=1, offset=16) / Address=#x18 Mismatch in pointer(non-local, block_id=1, offset=0) Source value: null, byte offset=0 Target value: #x01 ---------------------------------------- The underlying problem is in llvm::isBytewiseValue(), which creates and returns an UndefValue when called with an UndefValue, and this result is used by MemCpyOpt, leading to an incorrect optimization. Generally speaking, reasoning about undef values when optimizing is tricky, and considering that undef is scheduled for removal, change the function to bail out on undef values, handle poison instead, and create a poison value for all other purposes. Auding the callers of this function reveals that MemCpyOpt is the only caller that explicitly checks the return value of this function against undef: change this as well. This patch has the nice side-effect of cleaning up some undefs in the tests of MemCpyOpt and the unittests of ValueTracking. --- llvm/include/llvm/Analysis/ValueTracking.h | 3 +- llvm/lib/Analysis/ValueTracking.cpp | 22 +++++--- .../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 4 +- llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll | 22 ++++---- .../Transforms/MemCpyOpt/memcpy-to-memset.ll | 18 +++--- .../store-to-memset-is-nonzero-type.ll | 4 +- llvm/unittests/Analysis/ValueTrackingTest.cpp | 56 +++++++++---------- 7 files changed, 66 insertions(+), 63 deletions(-) diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index 8aa024a72afc8..d6d779ababaa8 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -617,8 +617,7 @@ inline std::optional computeKnownFPSignBit(const Value *V, unsigned Depth, /// return the i8 value that it is represented with. This is true for all i8 /// values obviously, but is also true for i32 0, i32 -1, i16 0xF0F0, double /// 0.0 etc. If the value can't be handled with a repeated byte store (e.g. -/// i16 0x1234), return null. If the value is entirely undef and padding, -/// return undef. +/// i16 0x1234), return null. If the value is undef, also return null. Value *isBytewiseValue(Value *V, const DataLayout &DL); /// Given an aggregate and an sequence of indices, see if the scalar value diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index f2c6949e535d2..ad71be4d82a01 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -6163,14 +6163,18 @@ Value *llvm::isBytewiseValue(Value *V, const DataLayout &DL) { LLVMContext &Ctx = V->getContext(); - // Undef don't care. - auto *UndefInt8 = UndefValue::get(Type::getInt8Ty(Ctx)); + // Allow poison. + auto *Poison = PoisonValue::get(Type::getInt8Ty(Ctx)); + if (isa(V)) + return Poison; + + // Forbid optimization over undef, for correctness reasons. if (isa(V)) - return UndefInt8; + return nullptr; - // Return Undef for zero-sized type. + // Return poison for zero-sized type. if (DL.getTypeStoreSize(V->getType()).isZero()) - return UndefInt8; + return Poison; Constant *C = dyn_cast(V); if (!C) { @@ -6228,15 +6232,15 @@ Value *llvm::isBytewiseValue(Value *V, const DataLayout &DL) { return LHS; if (!LHS || !RHS) return nullptr; - if (LHS == UndefInt8) + if (LHS == Poison) return RHS; - if (RHS == UndefInt8) + if (RHS == Poison) return LHS; return nullptr; }; if (ConstantDataSequential *CA = dyn_cast(C)) { - Value *Val = UndefInt8; + Value *Val = Poison; for (unsigned I = 0, E = CA->getNumElements(); I != E; ++I) if (!(Val = Merge(Val, isBytewiseValue(CA->getElementAsConstant(I), DL)))) return nullptr; @@ -6244,7 +6248,7 @@ Value *llvm::isBytewiseValue(Value *V, const DataLayout &DL) { } if (isa(C)) { - Value *Val = UndefInt8; + Value *Val = Poison; for (Value *Op : C->operands()) if (!(Val = Merge(Val, isBytewiseValue(Op, DL)))) return nullptr; diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index bb98b3d1c0725..e02d50965b201 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -418,8 +418,8 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, // Check to see if this stored value is of the same byte-splattable value. Value *StoredByte = isBytewiseValue(StoredVal, DL); - if (isa(ByteVal) && StoredByte) - ByteVal = StoredByte; + if (!StoredByte) + break; if (ByteVal != StoredByte) break; diff --git a/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll b/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll index 61e349e01ed91..2903c626ce2fb 100644 --- a/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll +++ b/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll @@ -51,8 +51,8 @@ define void @destroysrc(ptr %src, ptr %dst) { define void @destroynoaliassrc(ptr noalias %src, ptr %dst) { ; CHECK-LABEL: @destroynoaliassrc( -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST:%.*]], ptr align 8 [[SRC]], i64 16, i1 false) -; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SRC:%.*]], i8 0, i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST:%.*]], ptr align 8 [[SRC:%.*]], i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SRC]], i8 0, i64 16, i1 false) ; CHECK-NEXT: ret void ; %1 = load %S, ptr %src @@ -79,13 +79,13 @@ define void @copyalias(ptr %src, ptr %dst) { ; sure we lift the computation as well if needed and possible. define void @addrproducer(ptr %src, ptr %dst) { ; CHECK-LABEL: @addrproducer( -; CHECK-NEXT: [[DST2:%.*]] = getelementptr [[S:%.*]], ptr [[DST]], i64 1 +; CHECK-NEXT: [[DST2:%.*]] = getelementptr [[S:%.*]], ptr [[DST:%.*]], i64 1 ; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 8 [[DST2]], ptr align 8 [[SRC:%.*]], i64 16, i1 false) -; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[DST:%.*]], i8 undef, i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[DST]], i8 poison, i64 16, i1 false) ; CHECK-NEXT: ret void ; %1 = load %S, ptr %src - store %S undef, ptr %dst + store %S poison, ptr %dst %dst2 = getelementptr %S , ptr %dst, i64 1 store %S %1, ptr %dst2 ret void @@ -94,14 +94,14 @@ define void @addrproducer(ptr %src, ptr %dst) { define void @aliasaddrproducer(ptr %src, ptr %dst, ptr %dstidptr) { ; CHECK-LABEL: @aliasaddrproducer( ; CHECK-NEXT: [[TMP1:%.*]] = load [[S:%.*]], ptr [[SRC:%.*]], align 8 -; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[DST:%.*]], i8 undef, i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[DST:%.*]], i8 poison, i64 16, i1 false) ; CHECK-NEXT: [[DSTINDEX:%.*]] = load i32, ptr [[DSTIDPTR:%.*]], align 4 ; CHECK-NEXT: [[DST2:%.*]] = getelementptr [[S]], ptr [[DST]], i32 [[DSTINDEX]] ; CHECK-NEXT: store [[S]] [[TMP1]], ptr [[DST2]], align 8 ; CHECK-NEXT: ret void ; %1 = load %S, ptr %src - store %S undef, ptr %dst + store %S poison, ptr %dst %dstindex = load i32, ptr %dstidptr %dst2 = getelementptr %S , ptr %dst, i32 %dstindex store %S %1, ptr %dst2 @@ -113,12 +113,12 @@ define void @noaliasaddrproducer(ptr %src, ptr noalias %dst, ptr noalias %dstidp ; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DSTIDPTR:%.*]], align 4 ; CHECK-NEXT: [[DSTINDEX:%.*]] = or i32 [[TMP2]], 1 ; CHECK-NEXT: [[DST2:%.*]] = getelementptr [[S:%.*]], ptr [[DST:%.*]], i32 [[DSTINDEX]] -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST2]], ptr align 8 [[SRC]], i64 16, i1 false) -; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SRC:%.*]], i8 undef, i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST2]], ptr align 8 [[SRC:%.*]], i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SRC]], i8 poison, i64 16, i1 false) ; CHECK-NEXT: ret void ; %1 = load %S, ptr %src - store %S undef, ptr %src + store %S poison, ptr %src %2 = load i32, ptr %dstidptr %dstindex = or i32 %2, 1 %dst2 = getelementptr %S , ptr %dst, i32 %dstindex @@ -130,7 +130,7 @@ define void @throwing_call(ptr noalias %src, ptr %dst) { ; CHECK-LABEL: @throwing_call( ; CHECK-NEXT: [[TMP1:%.*]] = load [[S:%.*]], ptr [[SRC:%.*]], align 8 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SRC]], i8 0, i64 16, i1 false) -; CHECK-NEXT: call void @call() [[ATTR2:#.*]] +; CHECK-NEXT: call void @call() #[[ATTR2:[0-9]+]] ; CHECK-NEXT: store [[S]] [[TMP1]], ptr [[DST:%.*]], align 8 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy-to-memset.ll b/llvm/test/Transforms/MemCpyOpt/memcpy-to-memset.ll index 1858f306db9f3..5852bccb4962e 100644 --- a/llvm/test/Transforms/MemCpyOpt/memcpy-to-memset.ll +++ b/llvm/test/Transforms/MemCpyOpt/memcpy-to-memset.ll @@ -3,15 +3,15 @@ declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind -@undef = internal constant i32 undef, align 4 -define void @test_undef() nounwind { -; CHECK-LABEL: @test_undef( +@poison = internal constant i32 poison, align 4 +define void @test_poison() nounwind { +; CHECK-LABEL: @test_poison( ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 -; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[A]], i8 undef, i64 4, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[A]], i8 poison, i64 4, i1 false) ; CHECK-NEXT: ret void ; %a = alloca i32, align 4 - call void @llvm.memcpy.p0.p0.i64(ptr align 4 %a, ptr align 4 @undef, i64 4, i1 false) + call void @llvm.memcpy.p0.p0.i64(ptr align 4 %a, ptr align 4 @poison, i64 4, i1 false) ret void } @@ -27,15 +27,15 @@ define void @test_i32x3() nounwind { ret void } -@i32x3_undef = internal constant [3 x i32] [i32 -1, i32 undef, i32 -1], align 4 -define void @test_i32x3_undef() nounwind { -; CHECK-LABEL: @test_i32x3_undef( +@i32x3_poison = internal constant [3 x i32] [i32 -1, i32 poison, i32 -1], align 4 +define void @test_i32x3_poison() nounwind { +; CHECK-LABEL: @test_i32x3_poison( ; CHECK-NEXT: [[A:%.*]] = alloca [3 x i32], align 4 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[A]], i8 -1, i64 12, i1 false) ; CHECK-NEXT: ret void ; %a = alloca [3 x i32], align 4 - call void @llvm.memcpy.p0.p0.i64(ptr align 4 %a, ptr align 4 @i32x3_undef, i64 12, i1 false) + call void @llvm.memcpy.p0.p0.i64(ptr align 4 %a, ptr align 4 @i32x3_poison, i64 12, i1 false) ret void } diff --git a/llvm/test/Transforms/MemCpyOpt/store-to-memset-is-nonzero-type.ll b/llvm/test/Transforms/MemCpyOpt/store-to-memset-is-nonzero-type.ll index 0455d65fe7521..6b53138342ebf 100644 --- a/llvm/test/Transforms/MemCpyOpt/store-to-memset-is-nonzero-type.ll +++ b/llvm/test/Transforms/MemCpyOpt/store-to-memset-is-nonzero-type.ll @@ -5,7 +5,7 @@ define void @array_zero(ptr %p) { ; CHECK-LABEL: @array_zero( -; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[P:%.*]], i8 undef, i64 0, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[P:%.*]], i8 poison, i64 0, i1 false) ; CHECK-NEXT: ret void ; store [0 x i8] zeroinitializer, ptr %p @@ -25,7 +25,7 @@ define void @array_nonzero(ptr %p) { define void @struct_zero(ptr %p) { ; CHECK-LABEL: @struct_zero( -; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[P:%.*]], i8 undef, i64 0, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[P:%.*]], i8 poison, i64 0, i1 false) ; CHECK-NEXT: ret void ; store { } zeroinitializer, ptr %p diff --git a/llvm/unittests/Analysis/ValueTrackingTest.cpp b/llvm/unittests/Analysis/ValueTrackingTest.cpp index 0145ee70a14c1..f98f6b9a5531e 100644 --- a/llvm/unittests/Analysis/ValueTrackingTest.cpp +++ b/llvm/unittests/Analysis/ValueTrackingTest.cpp @@ -2769,8 +2769,8 @@ const std::pair IsBytewiseValueTests[] = { "ptr null", }, { - "i8 undef", - "ptr undef", + "i8 poison", + "ptr poison", }, { "i8 0", @@ -2789,8 +2789,8 @@ const std::pair IsBytewiseValueTests[] = { "i8 -1", }, { - "i8 undef", - "i16 undef", + "i8 poison", + "i16 poison", }, { "i8 0", @@ -2869,28 +2869,28 @@ const std::pair IsBytewiseValueTests[] = { "ptr inttoptr (i96 -1 to ptr)", }, { - "i8 undef", + "i8 poison", "[0 x i8] zeroinitializer", }, { - "i8 undef", - "[0 x i8] undef", + "i8 poison", + "[0 x i8] poison", }, { - "i8 undef", + "i8 poison", "[5 x [0 x i8]] zeroinitializer", }, { - "i8 undef", - "[5 x [0 x i8]] undef", + "i8 poison", + "[5 x [0 x i8]] poison", }, { "i8 0", "[6 x i8] zeroinitializer", }, { - "i8 undef", - "[6 x i8] undef", + "i8 poison", + "[6 x i8] poison", }, { "i8 1", @@ -2910,15 +2910,15 @@ const std::pair IsBytewiseValueTests[] = { }, { "i8 1", - "[4 x i8] [i8 1, i8 undef, i8 1, i8 1]", + "[4 x i8] [i8 1, i8 poison, i8 1, i8 1]", }, { "i8 0", "<6 x i8> zeroinitializer", }, { - "i8 undef", - "<6 x i8> undef", + "i8 poison", + "<6 x i8> poison", }, { "i8 1", @@ -2938,15 +2938,15 @@ const std::pair IsBytewiseValueTests[] = { }, { "i8 5", - "<2 x i8> < i8 5, i8 undef >", + "<2 x i8> < i8 5, i8 poison >", }, { "i8 0", "[2 x [2 x i16]] zeroinitializer", }, { - "i8 undef", - "[2 x [2 x i16]] undef", + "i8 poison", + "[2 x [2 x i16]] poison", }, { "i8 -86", @@ -2959,36 +2959,36 @@ const std::pair IsBytewiseValueTests[] = { "[2 x i16] [i16 -21836, i16 -21846]]", }, { - "i8 undef", + "i8 poison", "{ } zeroinitializer", }, { - "i8 undef", - "{ } undef", + "i8 poison", + "{ } poison", }, { - "i8 undef", + "i8 poison", "{ {}, {} } zeroinitializer", }, { - "i8 undef", - "{ {}, {} } undef", + "i8 poison", + "{ {}, {} } poison", }, { "i8 0", "{i8, i64, ptr} zeroinitializer", }, { - "i8 undef", - "{i8, i64, ptr} undef", + "i8 poison", + "{i8, i64, ptr} poison", }, { "i8 -86", - "{i8, i64, ptr} {i8 -86, i64 -6148914691236517206, ptr undef}", + "{i8, i64, ptr} {i8 -86, i64 -6148914691236517206, ptr poison}", }, { "", - "{i8, i64, ptr} {i8 86, i64 -6148914691236517206, ptr undef}", + "{i8, i64, ptr} {i8 86, i64 -6148914691236517206, ptr poison}", }, };